我没有用过这个格式,但是比如你这底下的一堆
[Python] 纯文本查看 复制代码 with open(xml_path10, "r", encoding="utf-8") as f:
_text10 = f.read()
tree10 = xmltodict.parse(_text10)
for row10 in tree10['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict10['ofd:TextCode'] = row10['ofd:TextCode'].get('#text')
# return data_dict
for i10 in range(0, 29939):
eachword10 = tree10['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i10][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + '/ofdtxt/09.txt', "a") as f:
f.write(' ' + eachword10)
类似的代码,不可以用循环包起来吗?比如
[Python] 纯文本查看 复制代码 for num in range(1, 36):
xml_path = f"{file_path}/Doc_0/Pages/Page_{num}/Content.xml"
try:
with open(xml_path, "r", encoding="utf-8") as f:
_text = f.read()
tree = xmltodict.parse(_text)
for row10 in tree['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject']:
data_dict10['ofd:TextCode'] = row10['ofd:TextCode'].get('#text')
# return data_dict
for i10 in range(0, 29939):
eachword = tree['ofd:Page']['ofd:Content']['ofd:Layer']['ofd:TextObject'][i10][
'ofd:TextCode'].get(
'#text')
with open(os.getcwd() + f'/ofdtxt/{num}.txt', "a") as f:
f.write(' ' + eachword)
except:
pass |