因为工作需要,看了坛友的写的一些,主要是从百度上得到,但是跟手头工作不协调,自己写了一个:
库自己按装一下
[Python] 纯文本查看 复制代码
from PIL import Image
import requests
from bs4 import BeautifulSoup
import os
def split_grid_rows(image_path, grid_cols=6, grid_rows=6,spacing=3):
#转为黑白色处理
def convert_image_gray(image_path):
image_convert = Image.open(image_path)
# 将图片转换为灰度图(黑白色)
gray_image = image_convert.convert('L')
# gray_image.show()
gray_image.save(image_path)
#是否转为黑白
# convert_image_gray(image_path)
# 打开图片
image = Image.open(image_path)
# 获取图片的总宽度和总高度
width, height = image.size
# 计算单个格子的宽度和高度(假设所有格子大小相同)
# 这里减去了(grid_cols-1)*spacing的列间距和两侧的边缘间距
grid_width = (width - (grid_cols - 1) * spacing) // grid_cols-1
# 计算行数,这里假设格子占据了整个图片的高度,除了顶部的间距
num_rows = grid_rows
# 切分格子的函数
def crop_grid_cell(row, col):
if col==0:
x1 = col * (grid_width)
y1 = row * (grid_width)
elif col>=1:
x1 = col * (grid_width + spacing+1)
y1 = row * (grid_width + spacing+1)
if col==0 and row>=1:
x1 = col * (grid_width)
y1 = row * (grid_width + spacing+1)
x2 = x1 + grid_width
y2 = y1 + grid_width
# print((x1, y1))
return image.crop((x1, y1, x2, y2))
# 初始化一个列表来存储所有切分出来的格子图片
grid_cells = []
# 遍历每一行
for row in range(num_rows):
row_cells = []
# 遍历每一列
for col in range(grid_cols):
cell = crop_grid_cell(row, col)
row_cells.append(cell)
grid_cells.append(row_cells)
return grid_cells
def calculate_rows(Nums):
# 每一行有6个数字
rows = Nums // 6 # 使用整数除法来获取行数
remainder = Nums % 6 # 计算余数,即不满一行的数字个数
# 如果余数不为0,说明还有不满一行的数字,需要额外增加一行
if remainder > 0:
rows += 1
return rows
def split_image(img_path='image.png',g_cols=6,g_rows=1,spacing=3,total_bihua=0):
# 图片路径
last_slash_index = img_path.rfind('/')
short_path = img_path[:last_slash_index]
# 调用函数切分格子
grid_cells = split_grid_rows(img_path)
# 如果需要保存切分后的格子图片
i=1
for row_num, row_cells in enumerate(grid_cells):
for col_num, cell in enumerate(row_cells):
cell_path =short_path+"/"+ f'{i}.png'
cell.save(cell_path)
i=i+1
if i>= total_bihua:
return True
def query_Chinese_characters(characters):
query_server_host="https://zidian.yw11.com"
query_server_path="/bishun"
for c in characters:
image_number,image_short_src=download_html(query_server_host+query_server_path+"/"+c)
if image_short_src:
image_short_full_src=query_server_host+image_short_src
ret_file_path=download_image(image_short_full_src,c)
print(ret_file_path)
if ret_file_path:
grows=calculate_rows(image_number+1)
print(image_number,grows)
#如果需要最后一个完整的字,请将total_bihua增为:image_number+1+1
split_image(ret_file_path,g_cols=6,g_rows=grows,spacing=3,total_bihua=image_number+1)
def download_html(url):
res = requests.get(url)
res_text = res.text
doc = BeautifulSoup(res_text,'html.parser')
i_tags = doc.find_all('i')
# 获取最后一个<i>标签的文本内容,得到笔划数
last_i_tag_text = i_tags[-1].get_text()
number = int(last_i_tag_text)
# print(number)
query_image = doc.find('img',class_="biImgBox")#提取笔顺表
if query_image:
src_value = query_image.get('src')
return (number,src_value) # 在此返回一个元组,包含笔划总数,笔划表地址
else:
print("Image with class 'biImgBox' not found.")
def download_image(url,character_c):
print(url)
# 发送GET请求
response = requests.get(url, stream=True)
# 检查请求是否成功
if response.status_code == 200:
#新建文件夹、文件
get_type=url.split('.')[-1]
dir_path='characters/'+character_c
os.makedirs(dir_path, exist_ok=True)
file_path=dir_path+"/"+character_c+"."+get_type
# 打开文件以二进制写模式写入内容
with open(file_path, 'wb') as file:
# 迭代响应内容并写入文件
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
# print(f"图片已成功下载到 {filename}")
return file_path
else:
print(f"请求失败,状态码: {response.status_code}")
return False
if __name__ == '__main__':
query_Chinese_characters("打开文件以二进制写模式写入内容")
pass
这个是一张图:
以下这个是按笔划分割的单张图:
|