解决python下的编码问题
python下不知道什么时候就会报编码错误,走一波,试试效果如何。
import os
import codecs
# 读取文本文件函数,支持bom-utf-8,utf-8,utf-16,gbk,gb2312
# 返回文件内容
def ReadTextFile(filepath):
try:
file = open(filepath, 'rb')
except IOError as err:
print('读取文件出错 in ReadFile', err)
bytes = file.read()
file.close()
if bytes[:3] == codecs.BOM_UTF8:
content = bytes.decode('utf-8')
else:
try:
content = bytes.decode('gb2312')
except UnicodeDecodeError as err:
try:
content = bytes.decode('utf-16')
except UnicodeDecodeError as err:
try:
content = bytes.decode('utf-8')
except UnicodeDecodeError as err:
try:
content = bytes.decode('gbk')
except UnicodeDecodeError as err:
content = ''
print('不支持此种类型的文本文件编码', err)
return content
你这。。。如果打开的时候报错的话,打开的错误被处理了,但是后面的read还是会报错,因为你没有拿到文件句柄,还有打开文件用with吧 你这根本就是弟弟操作呀
with open("your_file", 'rb') as fp:
file_data = fp.read()
result = chardet.detect(file_data)
file_content = file_data.decode(encoding=result['encoding'])
页:
[1]