获取汉字笔顺并形成笔顺图片

JaySun520 · 发表于 2023-7-24 14:01

由于不会笔画，所以利用python爬取百度上笔顺信息并生成对应的图片

[Python] 纯文本查看 复制代码

# -!- coding: utf-8 -!-
import re
import urllib.request
import urllib.parse
import time
import os

from lxml import etree
import pandas as pd
from openpyxl import load_workbook

#将SVG路径转换为png图片
from cairosvg import svg2png
import tkinter as tk
from tkinter.filedialog import askopenfilename
from tkinter.filedialog import askdirectory
from tkinter.filedialog import asksaveasfilename
from tkinter.ttk import *

OUTPUTADDRESS = 'D:/汉字笔画/'

def select_db_file(self):
    db_file = askopenfilename(title="请选择汉字文件",filetypes=[('xlsx', '*.xlsx')])
    self.db.set(db_file)

def select_save_file(self):
    save_directory = askdirectory(initialdir=OUTPUTADDRESS)
    self.f.set(save_directory+ '/')

class SelectPage:
    def __init__(self, parent_window):
        parent_window.destroy()  # 销毁子界面
        super(SelectPage, self).__init__()
        self.createWidgets()


    def createWidgets(self):
        # 设置界面
        self.window = tk.Tk()  # 实例化object，建立窗口window
        self.window.winfo_toplevel()
        self.window.title('笔顺生成工具------Design By xxx')

        # 字体颜色
        font_color_label = Label(self.window, font=('微软雅黑', 10), text='字体颜色', justify='right')
        font_color_label.grid(row=1,column=1,padx=(2,0),pady=(2,0),sticky='WE')

        font_color_label2 = Label(self.window, font=('微软雅黑', 6), text='(黑色:#000000；灰色:#B8B8B8)', justify='right',background='lightskyblue')
        font_color_label2.grid(row=1, column=2, padx=(2, 0), pady=(2, 0), sticky='WE')

        self.window.font_color = tk.StringVar(value='#B8B8B8')
        font_color_entry = Entry(self.window, width=80, textvariable=self.window.font_color)
        font_color_entry.grid(row=1, column=3, padx=3, pady=3, sticky='WE')

        # 字体最后一笔颜色
        font_color_last_label = Label(self.window, font=('微软雅黑', 10), text='最后一笔颜色', justify='right')
        font_color_last_label.grid(row=2, column=1, padx=(2,0), pady=(2,0), sticky='WE')

        font_color_last_label2 = Label(self.window, font=('微软雅黑', 6), text='(白色:#FFFFFF；红色:#FF1111)',justify='right',background='lightskyblue')
        font_color_last_label2.grid(row=2, column=2, padx=(2, 0), pady=(2, 0), sticky='WE')

        self.window.font_color_last = tk.StringVar(value='#B8B8B8')
        font_color_last_entry = Entry(self.window, width=80, textvariable=self.window.font_color_last)
        font_color_last_entry.grid(row=2, column=3, padx=3, pady=3, sticky='WE')

        # 需要爬取信息的汉字文件路径
        self.window.db = tk.StringVar()
        db_select = Button(self.window, text='汉字文件',command=lambda:select_db_file(self.window))
        db_select.grid(row=3,column=1,columnspan=2 ,sticky='W',padx=(2,0),pady=(2,0))

        ExcelFile_path = Entry(self.window,width=80,textvariable = self.window.db)
        ExcelFile_path['state'] = 'readonly'
        ExcelFile_path.grid(row=3,column=3,padx=3,pady=3,sticky='WE')

        # 需要保存汉字信息的文件夹路径
        self.window.f = tk.StringVar()
        save_path = Button(self.window, text='笔顺保存地址',command=lambda:select_save_file(self.window))
        save_path.grid(row=4,column=1,columnspan=2, sticky='W',padx=(2,0),pady=(2,0))

        ExcelOutputFile_path = Entry(self.window,width=80,textvariable = self.window.f)
        ExcelOutputFile_path['state'] = 'readonly'
        ExcelOutputFile_path.grid(row=4, column=3,padx=3,pady=3,sticky='WE')

        # 启动生成笔顺
        ExcelFile_sheetName = '生字表（一上）'
        create_btn = Button(self.window, text='生成笔顺',command=lambda: createBiShun(self.window,self.window.db.get(),ExcelFile_sheetName,self.window.f.get()))
        create_btn.grid(row=5,column=1,columnspan=3,pady=(0,2))
        self.window.columnconfigure(2, weight=1)
        self.window.mainloop()

    def get_font_color(self):
        return self.window.font_color.get()

# 生成完整字体：读取汉字的详细信息，从百度网站读取信息
def ChineseChangeSVG2png(windows,svg_path,chinese):
    #1. 根据路径生成svg_code
    # style="fill: #FFFFFF ；黑色字体
    outputpath = OUTPUTADDRESS + chinese
    svg_output = {
        'width':'83px',
        'height':'83px',
        'xmlns':'http://www.w3.org/2000/svg',
        "font_color": "#000000",  # 黑色
        "font_color_last": "#FF1111",  # 红色
        "output_address": outputpath,
        "output_filename": ''
    }

    if not os.path.exists(outputpath):#为每个汉字创建文件夹
        os.mkdir(outputpath)

    svg_code = []
    svg_code_temp = '<svg style="width: ' + svg_output['width'] +'; height:' + svg_output['height'] +';" xmlns="' + svg_output['xmlns'] +'">'
    svg_code.append(svg_code_temp)
    svg_code_temp = '    <g transform="translate(3, 70) scale(0.07, -0.07)">'
    svg_code.append(svg_code_temp)
    # 加入路径代码
    for outputpath_perstep in svg_path:
        svg_code_temp = '        <path d="'+outputpath_perstep+'" style="fill: '+ svg_output['font_color'] +';"></path>'
        svg_code.append(svg_code_temp)
    svg_code_temp ='    </g>'
    svg_code.append(svg_code_temp)
    svg_code_temp = '</svg>'
    svg_code.append(svg_code_temp)
    # 输出png图片
    svgcode = '\n'.join(svg_code)
    svg_output['output_filename'] = svg_output['output_address'] + '/' + chinese + '0.png' # 完整汉字的文件以0结尾
    try:
        svg2png(bytestring=svgcode, write_to=svg_output['output_filename'])
    except Exception as e:
        print('error:' + e)

def craw_pinyin(chinese):
    # 获得汉字的拼音
    url = 'http://hanyu.baidu.com/s?wd=' + urllib.parse.quote(chinese) + '&ptype=zici'
    header = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36'
    }
    serverError = True
    chinese_details = {
        "chinese": chinese,  # 汉字
        "pinyin": "",  # 拼音
    }
    while serverError:
        try:
            request = urllib.request.Request(url, headers=header)
            reponse = urllib.request.urlopen(request).read()
            html = str(reponse)
            web_html = etree.HTML(str(reponse, 'utf-8'))

            # 取得拼音
            try:
                pinyin = web_html.xpath('//div[@id="pinyin"]')[0]
                pinyin_text = '、'.join(pinyin.xpath('span/b/text()'))
                chinese_details['pinyin'] = pinyin_text
            except Exception as e:
                print("无法获得" + chinese + "的属性:pinyin")

            serverError = False

        except Exception as e:
            print(chinese + 'server error')
            time.sleep(2)
    print('读取汉字信息：' + chinese, pinyin_text)
    return chinese_details


# 读取汉字的详细信息，从百度网站读取信息
def ChangeSVG2png(windows,svg_path,chinese):
    #1. 根据路径生成svg_code
    # style="fill: #B8B8B8 ；灰色字体
    # style="fill: #FF1111 ；红色字体
    outputpath = OUTPUTADDRESS + chinese
    svg_output = {
        'width':'83px',
        'height':'83px',
        'xmlns':'http://www.w3.org/2000/svg',
        "font_color": "#B8B8B8",  # 灰色
        "font_color_last": "#FF1111",  # 红色
        "output_address": outputpath,
        "output_filename": ''
    }

    if not os.path.exists(outputpath):#为每个汉字创建文件夹
        os.mkdir(outputpath)

    svg_code = []
    svg_code_temp = '<svg style="width: ' + svg_output['width'] +'; height:' + svg_output['height'] +';" xmlns="' + svg_output['xmlns'] +'">'
    svg_code.append(svg_code_temp)
    svg_code_temp = '    <g transform="translate(3, 70) scale(0.07, -0.07)">'
    svg_code.append(svg_code_temp)
    # 加入路径代码
    for outputpath_perstep in svg_path:
        if svg_path.index(outputpath_perstep)+1 < len(svg_path):#中间笔画的颜色
            # svg_code_temp = '        <path d="'+outputpath_perstep+'" style="fill: '+ svg_output['font_color'] +';"></path>'
            svg_code_temp = '        <path d="' + outputpath_perstep + '" style="fill: ' + windows.font_color.get() + ';"></path>'
        else:#最后一笔的颜色
            # svg_code_temp = '        <path d="' + outputpath_perstep + '" style="fill: ' + svg_output['font_color_last'] + ';"></path>' # 红色
            # svg_code_temp = '        <path d="' + outputpath_perstep + '" style="fill: ' + svg_output['font_color'] + ';"></path>' # 灰色
            svg_code_temp = '        <path d="' + outputpath_perstep + '" style="fill: ' + windows.font_color_last.get() + ';"></path>'  # 灰色
        svg_code.append(svg_code_temp)
    svg_code_temp ='    </g>'
    svg_code.append(svg_code_temp)
    svg_code_temp = '</svg>'
    svg_code.append(svg_code_temp)
    # 输出png图片
    svgcode = '\n'.join(svg_code)
    svg_output['output_filename'] = svg_output['output_address'] + '/' + chinese + str(len(svg_path)) + '.png'
    try:
        svg2png(bytestring=svgcode, write_to=svg_output['output_filename'])
    except Exception as e:
        print('error:' + e)

def craw_pinyin(chinese):
    # 获得汉字的拼音
    url = 'http://hanyu.baidu.com/s?wd=' + urllib.parse.quote(chinese) + '&ptype=zici'
    header = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36'
    }
    serverError = True
    chinese_details = {
        "chinese": chinese,  # 汉字
        "pinyin": "",  # 拼音
    }
    while serverError:
        try:
            request = urllib.request.Request(url, headers=header)
            reponse = urllib.request.urlopen(request).read()
            html = str(reponse)
            web_html = etree.HTML(str(reponse, 'utf-8'))

            # 取得拼音
            try:
                pinyin = web_html.xpath('//div[@id="pinyin"]')[0]
                pinyin_text = '、'.join(pinyin.xpath('span/b/text()'))
                chinese_details['pinyin'] = pinyin_text
            except Exception as e:
                print("无法获得" + chinese + "的属性:pinyin")

            serverError = False

        except Exception as e:
            print(chinese + 'server error')
            time.sleep(2)
    print('读取汉字信息：' + chinese, pinyin_text)
    return chinese_details



def craw(windows,chinese,ExcelOutputFile_path):
    url = 'http://hanyu.baidu.com/s?wd=' + urllib.parse.quote(chinese) + '&ptype=zici'
    header = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36'
    }
    serverError = True
    chinese_details = {
        "chinese": chinese,# 汉字
        "pinyin": "", # 拼音
        "stroke_count":"", # 笔画数
        "img_address": "",# 笔顺动画
        "mp3_address": "",# 发音地址
        "zimi":"",# 字谜
        "basicmean":"", # 基本释义
        "zuci": "", # 相关词组
        "synonym":"", # 近义词
        "antonym":"", # 反义词
        "bishun_svg": "",  # 笔顺SVG路径
        "baikemean":"" #百科释义
    }
    while serverError:
        try:

            request = urllib.request.Request(url, headers=header)
            reponse = urllib.request.urlopen(request).read()
            html = str(reponse)

            web_html = etree.HTML(str(reponse,'utf-8'))

            if not os.path.exists(OUTPUTADDRESS + chinese + '/'):  # 为每个汉字创建文件夹
                os.mkdir(OUTPUTADDRESS + chinese + '/')

            # 取得拼音
            try:
                pinyin = web_html.xpath('//div[@id="pinyin"]')[0]
                pinyin_text = '、'.join(pinyin.xpath('span/b/text()'))
                chinese_details['pinyin'] = pinyin_text
            except Exception as e:
                print("无法获得" + chinese + "的属性:pinyin")

            # 取得笔画数
            try:
                stroke_count = web_html.xpath('//li[@id="stroke_count"]')[0]
                stroke_count_text = stroke_count.xpath('span')[0].text
                chinese_details['stroke_count'] = stroke_count_text
            except Exception as e:
                print("无法获得" + chinese + "的属性:pinyin")

            #取得汉字动画和汉字读音
            try:
                imgs = re.compile('data-gif="(.+?\.gif)"').findall(html)# 获取汉字动图
                mp3s = re.compile('url="(.+?\.mp3)"').findall(html)# 获取汉字读音

                for img,mp3 in zip(imgs,mp3s):
                    imagename = ExcelOutputFile_path +  chinese + '/' + chinese + '.gif'
                    imageurl = img
                    chinese_details['img_address'] = imageurl
                    mp3name = ExcelOutputFile_path + chinese + '/' + chinese + '.mp3'
                    mp3url = mp3
                    chinese_details['mp3_address'] = mp3url
                    # 下载动画和读音文件
                    try:
                        urllib.request.urlretrieve(imageurl, filename=imagename)
                        urllib.request.urlretrieve(mp3url, filename=mp3name)
                    except Exception as e:
                        print(chinese + ' failure')
            except Exception as e:
                print("无法获得" + chinese + "的属性:img,mp3")

            #取得字谜
            try:
                zimi = web_html.xpath('//div[@id="miyu-wrapper"]')[0]
                zimi_text = ' '.join(zimi.xpath('div/p/text()'))
                chinese_details['zimi'] = zimi_text
            except Exception as e:
                print("无法获得" + chinese + "的属性:zimi")

            #取得基本释义
            try:
                basicmean = web_html.xpath('//div[@id="basicmean-wrapper"]')[0]
                basicmean_text = '。'.join(basicmean.xpath('div/dl/dd/p/text()'))
                chinese_details['basicmean'] = basicmean_text
            except Exception as e:
                print("无法获得" + chinese + "的属性:basicmean")

            # 取得相关组词
            try:
                zuci = web_html.xpath('//div[@id="zuci-wrapper"]')[0]
                zuci_text = '、'.join(zuci.xpath('div/a/text()')[0:-1])
                chinese_details['zuci'] = zuci_text
            except Exception as e:
                print("无法获得" + chinese + "的属性:zuci")

            # 取得近义词
            try:
                synonym = web_html.xpath('//div[@id="synonym"]')[0]
                synonym_text = '、'.join(synonym.xpath('div/a/text()'))
                chinese_details['synonym'] = synonym_text
            except Exception as e:
                # print("无法获得" + chinese + "的属性:antonym")
                pass

            # 取得反义词antonym
            try:
                antonym = web_html.xpath('//div[@id="antonym"]')[0]
                antonym_text = '、'.join(antonym.xpath('div/a/text()'))
                chinese_details['antonym'] = antonym_text
            except Exception as e:
                # print("无法获得" + chinese + "的属性:antonym")
                pass

            # 取得SVG路径
            try:
                bishun = web_html.xpath('//div[@class="word-stroke-wrap"]')[0]
                bishun_svg = [x.get('d') for x in bishun.xpath('./div[1]/div/div/svg/g/path')]
                chinese_details['bishun_svg'] = bishun_svg
                svgpaths = bishun_svg[0:-1]
                svgpath_perstep = []
                for svgpath in svgpaths:
                    svgpath_perstep.append(svgpath)
                    ChangeSVG2png(windows, svgpath_perstep, chinese)

                ChineseChangeSVG2png(windows, svgpaths, chinese)
            except Exception as e:
                print("无法获得" + chinese + "的属性:bishun")

            # 取得百科释义
            try:
                baikemean = web_html.xpath('//div[@id="baike-wrapper"]')[0]
                baikemean_text = str(baikemean.xpath('normalize-space(div[2]/p/text())'))
                chinese_details['baikemean'] = baikemean_text
            except Exception as e:
                print("无法获得" + chinese + "的属性:baikemean")

            serverError = False
        except Exception as e:
            print(chinese + 'server error')
            time.sleep(2)
    print('读取汉字信息：' + chinese,pinyin_text)
    return chinese_details

def createBiShun(windows,ExcelFile_path,ExcelFile_sheetName,ExcelOutputFile_path):
    # # ExcelFile_path = r'F:\百度网盘同步空间\BaiduSyncdisk\4. 家庭教育\一年级\汉语拼音练习20221010.xlsx'
    # # ExcelFile_sheetName = '生字表（一上）'
    # df = pd.read_excel(ExcelFile_path, sheet_name=ExcelFile_sheetName, keep_default_na=False, engine='openpyxl')  # sheet_name:None-多个工作表读取
    #

    df = pd.read_excel(ExcelFile_path, sheet_name=0, keep_default_na=False,
                       engine='openpyxl')  # sheet_name:None-多个工作表读取；# sheet_name:ExcelFile_sheetName-指定工作表读取
    
    strs = list(df['内容'])
    for st in strs:
        chinese = craw(windows,st,ExcelOutputFile_path)
        df.loc[df['内容'] == st, '拼音'] = chinese['pinyin']  # 目标列名为列索引名称，x为目标值，y为更改值
        df.loc[df['内容'] == st, '笔画数'] = chinese['stroke_count']  # 目标列名为列索引名称，x为目标值，y为更改值
        df.loc[df['内容'] == st, '笔顺动画地址'] = chinese['img_address']  # 目标列名为列索引名称，x为目标值，y为更改值
        df.loc[df['内容'] == st, '读音地址'] = chinese['mp3_address']  # 目标列名为列索引名称，x为目标值，y为更改值
        df.loc[df['内容'] == st, '字谜'] = chinese['zimi']  # 目标列名为列索引名称，x为目标值，y为更改值
        df.loc[df['内容'] == st, '基础释义'] = chinese['basicmean']  # 目标列名为列索引名称，x为目标值，y为更改值
        df.loc[df['内容'] == st, '组词'] = chinese['zuci']  # 目标列名为列索引名称，x为目标值，y为更改值
        df.loc[df['内容'] == st, '近义词'] = chinese['synonym']  # 目标列名为列索引名称，x为目标值，y为更改值
        df.loc[df['内容'] == st, '反义词'] = chinese['antonym']  # 目标列名为列索引名称，x为目标值，y为更改值
        df.loc[df['内容'] == st, '笔画SVG路径PATH'] = '\n'.join(chinese['bishun_svg'])  # 目标列名为列索引名称，x为目标值，y为更改值
        df.loc[df['内容'] == st, '百科释义'] = chinese['baikemean']  # 目标列名为列索引名称，x为目标值，y为更改值

    with pd.ExcelWriter(ExcelFile_path, engin='openpyxl') as writer:
        df.to_excel(writer, sheet_name=ExcelFile_sheetName, index=False)

    print("写入完毕")

if __name__ == '__main__':
    window = tk.Tk()
    app = SelectPage(window)

freesaber · 发表于 2023-7-24 16:44

之前自己也搞过，爬百度的汉字笔画gif。后来，爬下来的内容，实际用处不大。娃现在练字，直接用的这个，可以生成+打印的：https://young.hao.360.com/writepaper

xiatongxue · 发表于 2023-7-24 16:10

厉害等我会了我也要出个教程学习学习

xinxiu · 发表于 2023-7-24 14:30

厉害了，感谢分享。

SU150228 · 发表于 2023-7-24 14:36

图片是田字格样式就好了

testwind · 发表于 2023-7-24 14:39

学习研究下，也许以后能用上

LAML · 发表于 2023-7-24 14:42

虽然现在用不上，但是不妨碍点赞收藏（收藏癖路过），可以整个米字格的版本

wub88 · 发表于 2023-7-24 14:43

厉害了，感谢分享。

llamb · 发表于 2023-7-24 15:02

好东西收藏一下

鹿鸣 · 发表于 2023-7-24 15:17

这个实现起来这么多代码的，学习一下

额微粒波地 · 发表于 2023-7-24 16:21

赞一个，这个小工具不错

帐号		自动登录	找回密码
密码			注册[Register]

[Python 原创] 获取汉字笔顺并形成笔顺图片

免费评分