本帖最后由 cick 于 2024-5-15 16:51 编辑
[Python] 纯文本查看 复制代码 import os
import platform
import subprocess
import requests
import shutil
import PIL.Image as pimg
from lxml import etree
import PySimpleGUI as sg
def mk_tmp():
'''
Create a temporary folder
'''
try:
os.mkdir('temp')
except:
shutil.rmtree('temp', True)
os.mkdir('temp')
def main_process(file_id, pic_index=0):
mk_tmp()
while True:
db_web = f'https://hbba.sacinfo.org.cn/hbba_onlineRead_page/{file_id}/{pic_index}.png'
r = requests.get(db_web, headers=headers)
print(f'page:{pic_index},status:{r.status_code}')
if r.status_code == 404:
print('complete!')
break
else:
with open('./temp/' + f'{pic_index}.png', 'wb') as f:
f.write(r.content)
pic_index += 1
def get_filename(file_id):
name_web = f'https://hbba.sacinfo.org.cn/stdDetail/{file_id}'
r = requests.get(name_web, headers=headers)
html_element = etree.HTML(r.text)
xpath_filename = '//h4/text()'
filename = html_element.xpath(xpath_filename)[0].strip('\r\n\t')
print(filename)
return filename
def open_folder(folder_path):
system = platform.system()
if system == "Windows":
subprocess.Popen(['explorer', folder_path])
elif system == "Darwin":
subprocess.Popen(['open', folder_path])
else:
subprocess.Popen(['xdg-open', folder_path])
if __name__ == '__main__':
sg.theme('SystemDefault')
layout = [
[sg.Text('Enter File ID:'), sg.InputText(key='file_id')],
[sg.Button('Download'), sg.Button('Exit')]
]
window = sg.Window('Download Tool', layout)
while True:
event, values = window.read()
if event == sg.WINDOW_CLOSED or event == 'Exit':
break
elif event == 'Download':
file_id = values['file_id']
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.46'
}
filename = get_filename(file_id)
main_process(file_id)
sources = []
file_list = os.listdir('./temp')
sources = [pimg.open(f'./temp/{i}.png') for i in range(len(file_list))]
sources[0].save(f'./{filename}.pdf', 'pdf', save_all=True, append_images=sources[1:])
sg.popup('Download completed!')
open_folder(os.path.abspath('.'))
break
window.close()
因为我是mac环境,所以改成用命令行来执行还是不太方便,然后GPT修改之后代码如下[Python] 纯文本查看 复制代码 import os
import requests
import shutil
import PIL.Image as pimg
from lxml import etree
def mk_tmp():
'''
Create a temporary folder
'''
try:
os.mkdir('temp')
except:
shutil.rmtree('temp', True)
os.mkdir('temp')
def main_process(file_id, pic_index=0):
mk_tmp()
while True:
db_web = f'https://hbba.sacinfo.org.cn/hbba_onlineRead_page/{file_id}/{pic_index}.png'
r = requests.get(db_web, headers=headers)
print(f'page:{pic_index},status:{r.status_code}')
if r.status_code == 404:
print('complete!')
break
else:
with open('./temp/' + f'{pic_index}.png', 'wb') as f:
f.write(r.content)
pic_index += 1
def get_filename(file_id):
name_web = f'https://hbba.sacinfo.org.cn/stdDetail/{file_id}'
r = requests.get(name_web, headers=headers)
html_element = etree.HTML(r.text)
xpath_filename = '//h4/text()'
filename = html_element.xpath(xpath_filename)[zxsq-anti-bbcode-0].strip('\r\n\t')
print(filename)
return filename
if name == 'main':
file_id = input("Enter the file ID: ") # Take file ID input from the user
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.46'
}
filename = get_filename(file_id)
main_process(file_id)
sources = []
file_list = os.listdir('./temp')
sources = [pimg.open(f'./temp/{i}.png') for i in range(len(file_list))]
sources[zxsq-anti-bbcode-0].save(f'./{filename}.pdf', 'pdf', save_all=True, append_images=sources[1:])
然后直接输入文章对应的file_id就可以下载了。前提是有py环境,打包的我一会试试。
第二版,改了下,直接有弹窗
效果如下
这是修改的第二个方式效果如下
|