本帖最后由 花朝朝暮暮开 于 2021-6-23 09:37 编辑
用起来总是不太顺手:偶尔有些图片转不了
但是将就能用,各位看看有什么补充的
想本地化通过这个https://www.52pojie.cn/thread-1451908-1-1.html 获取的图片
[Python] 纯文本查看 复制代码 from base64 import b64encode
from re import findall
from requests import get
from os.path import exists
from os import makedirs,getcwd,listdir
from time import sleep
def download_img_and_replaceb64(img_src, article):
# picname 图片原始文件名
picname=img_src.split('/')[-1]
headers = {
'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
}
res = get(img_src, headers=headers)
Pic_img_byte=res.content
# 图片暂不写入真实文件
# with open(root_dir + "\\"+picname, 'wb') as f:
# # 图片写入文件
# f.write(Pic_img_byte)
# f.close()
# 图片base64编码
b64 = str(b64encode(Pic_img_byte))[2:-1]
b64head='data:image/'+picname.split('.')[-1]+';base64,'
# data:image/png;base64,
# base64body 完整的图片 base64 编码
base64body=b64head+b64
# 替换图片url 为base64 编码
article=article.replace(img_src,base64body)
return article
def replaceMDimg(article,title):
urltxt=findall('<img.*?>',article)
for one in urltxt:
# 图片中以及被转化过的就跳过继续执行下一个转化
if 'data:image' in one:
continue
url=findall('http.*?[\s\"\'\?]',one)[0][:-1]
# 转化核心代码
article=download_img_and_replaceb64(url, article)
with open('./complete/' + title + ".md", mode="w", encoding="utf-8") as f:
f.write(article)
print("《%s》\n" % title)
def get_md_str(md):
with open(md,mode="r", encoding="utf-8") as f:
txt=f.read()
return txt
def get_all_article():
all_article=listdir()
all_md=[]
num=1
for x in all_article:
if x.endswith('.md'):
all_md.append(x)
print(str(num)+'、'+x)
num+=1
return all_md
if __name__ == '__main__':
# addr MD文件图片转码后存放的文件夹
addr = getcwd()+'\\complete'
if exists(addr):
sleep(0.1)
else:
makedirs(addr)
# 转换为获取同级目录下所有的MD文件
allmd=get_all_article()
# 依次转化mdz中的图片
for md in allmd:
article=get_md_str(md)
replaceMDimg(article,md[:-3]) |