[Python] 纯文本查看 复制代码 import re,os
#操作把子htm文件移动到当前add文件夹里(懒得去排除index,py,txt什么鬼文件)
#读取index 去掉body后缀,写入新文件中
indhtm=open('index.html',encoding='utf-8').read()
ml=re.sub('</body>[\s\S]+</html>','',indhtm)
with open('new.htm','a+',encoding='utf-8')as f:
f.write(ml)
#获取所有子htm文件名并按升序排序
adls=os.listdir('./add/')
adls.sort()
for i in adls:
#打开子htm并取body 追加到new后面
zhtm=open('./add/'+i,encoding='utf-8' ).read()
p=r'<body>([\s\S]+)</body>'
zbd=re.findall(p,zhtm)
with open('new.htm','a+',encoding='utf-8')as f:
f.write(' '+zbd[0]+'\n')
#最后写入body后缀
with open('new.htm','a+',encoding='utf-8')as f:
f.write(' </body>\n</html>')
print('处理完成') |