本帖最后由 dreamrise 于 2021-6-10 08:57 编辑
[Python] 纯文本查看 复制代码
# -*- coding: utf-8 -*-
import pdfplumber
import pandas as pd
pdf = pdfplumber.open("2020.pdf")
full_df = pd.DataFrame()
for i in range(1, 6):
second_page = pdf.pages[i]
table = second_page.extract_table()
print(table)
table_df = pd.DataFrame(table[1:], columns=table[0])
print(table_df)
full_df = pd.concat([full_df, table_df])
full_df.to_excel('小学段.xlsx')
full_df = pd.DataFrame()
for i in range(6, 11):
second_page = pdf.pages[i]
table = second_page.extract_table()
print(table)
table_df = pd.DataFrame(table[1:], columns=table[0])
print(table_df)
full_df = pd.concat([full_df, table_df])
full_df.to_excel('初中段.xlsx')
full_df = pd.DataFrame()
for i in range(11, 15):
second_page = pdf.pages[i]
table = second_page.extract_table()
print(table)
table_df = pd.DataFrame(table[1:], columns=table[0])
print(table_df)
full_df = pd.concat([full_df, table_df])
full_df.to_excel('高中段.xlsx')
下载链接:
链接: https://pan.baidu.com/s/1k7N1-kTb8e9eBiA7IppBPA 提取码: di3t
未编译exe文件,懒。 |