好友
阅读权限 10
听众
最后登录 1970-1-1
本帖最后由 858043016 于 2020-2-20 12:22 编辑
1.获取书名和目录
网页分别是
名称
https://lib-nuanxin.wqxuetang.com/v1/book/initbook?bid=3206295
目录
https://lib-nuanxin.wqxuetang.com/v1/book/catatree?bid=3206295
返回格式是json
2.pymupdf
生成PDF以及添加目录
3.代码
3.1只生成目录,结合FreePic2PDF使用,已经生成utf-16文件了,复制整个文件就可以使用
[Python] 纯文本查看 复制代码
import requests as req
import json
import os
def get_cata(book_id):
cata=""
url_name = "https://lib-nuanxin.wqxuetang.com/v1/book/catatree?bid={}".format(book_id)
response = req.post(url=url_name, headers=headers)
book_cata = json.loads(response.text)
for i in book_cata['data']:
cata=cata+str(i['label'])+'\t'+str(i['pnum'])+'\n'
# print(i['level'],i['label'],i['pnum'])
if ('children' in i):
for j in (i['children']):
# print("\t",j['level'],j['label'],j['pnum'])
cata=cata+'\t'+str(j['label'])+'\t'+str(j['pnum'])+'\n'
return cata
with open('Cookies.txt', 'r') as f:
cookies=f.read()
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Cookie': cookies,
'Host': 'lib-nuanxin.wqxuetang.com',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
}
book_id=input('输入书的id:\n')
with open('FreePic2Pdf_bkmk.txt', 'w',encoding='utf-16') as f:
cookies=f.write(get_cata(book_id))
3.2生成PDF和目录
[Python] 纯文本查看 复制代码
import fitz
import requests as req
import json
import os
import img2pdf
def get_name(book_id):
url_name="https://lib-nuanxin.wqxuetang.com/v1/book/initbook?bid={}".format(book_id)
response = req.post(url=url_name, headers=headers)
book_more=json.loads(response.text)
return(book_more['data']['name'],book_more['data']['pages'])
def get_cata(book_id):
cata=[]
url_name = "https://lib-nuanxin.wqxuetang.com/v1/book/catatree?bid={}".format(book_id)
response = req.post(url=url_name, headers=headers)
book_cata = json.loads(response.text)
for i in book_cata['data']:
cata.append([int(i['level']), i['label'], int(i['pnum'])])
# print(i['level'],i['label'],i['pnum'])
if ('children' in i):
for j in (i['children']):
# print("\t",j['level'],j['label'],j['pnum'])
cata.append([int(j['level']), j['label'], int(j['pnum'])])
return cata
def pic2pdf(book_id):
name, page_all = get_name(book_id)
print("开始进行{}_{},一共{}页".format(book_id,name,page_all))
#生成目录
path_tem = path_raw+"\\temp\\{}_{}_P{}.pdf".format(name, book_id, page_all)
path_final = path_raw+"\\{}_{}_P{}.pdf".format(name, book_id, page_all)
imgList = os.listdir(path_raw)
print("获取了{}页图片".format(len(imgList)))
imgList.sort(key=lambda x:int(x[:-4]))
img_all=[]
for img in imgList:
img_path=path_raw+"\\{}".format(img)
img_all.append(img_path)
pfn_bytes = img2pdf.convert(img_all, with_pdfrw=False);
#判断临时目录是否存在
if os.path.exists(path_raw+"\\temp") == False:
os.mkdir(path_raw+"\\temp")
with open(path_tem, "wb") as f:
f.write(pfn_bytes)
f.close()
print("准备生成目录...")
#添加目录
try:
doc = fitz.open(path_tem)
toc = get_cata(book_id)
doc.setToC(toc)
doc.save(path_final) # 保存pdf文件
doc.close()
print("保存成功{}".format(path_final))
os.remove(path_tem)
os.remove(path_raw+"\\temp")
except:
print("添加目录错误,检查页码是否正确")
#获取cookies
with open('Cookies.txt', 'r') as f:
cookies=f.read()
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Cookie': cookies,
'Host': 'lib-nuanxin.wqxuetang.com',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
}
book_id=input('输入书的id:\n')
path_raw=input('输入放着图片的文件夹的全部路径:\n')
pic2pdf(book_id)
3.3 20200220更新,不需要cookies了,代码直接复制就能用
[Python] 纯文本查看 复制代码
import json
import os
import fitz
import img2pdf
import requests as req
def get_name(book_id):
url_name = 'https://www.wqxuetang.com/v1/book/initbook?bid={}'.format(book_id)
response = req.post(url=url_name, headers=headers)
book_more=json.loads(response.text)
return(book_more['data']['name'],book_more['data']['pages'])
def get_cata(book_id):
cata=[]
url_name='https://www.wqxuetang.com/v1/book/catatree?bid={}'.format(book_id)
response = req.post(url=url_name, headers=headers)
book_cata = json.loads(response.text)
for i in book_cata['data']:
cata.append([int(i['level']), i['label'], int(i['pnum'])])
# print(i['level'],i['label'],i['pnum'])
if ('children' in i):
for j in (i['children']):
# print("\t",j['level'],j['label'],j['pnum'])
cata.append([int(j['level']), j['label'], int(j['pnum'])])
return cata
headers = {
'Accept': 'application/json,text/plain,*/*',
'Accept - Encoding': 'gzip,deflate,br',
'Accept - Language': 'zh-CN,zh;q = 0.9',
'BA': 'bapkg/com.bookask.wqxuetang,baver/0.0.1',
'Connection': 'keep - alive',
'Cookie':'',
'Host':'www.wqxuetang.com',
'Referer':'https://www.wqxuetang.com/',
'User - Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
}
def pic2pdf(book_id):
name, page_all = get_name(book_id)
print("开始进行{}_{},一共{}页".format(book_id,name,page_all))
#生成目录
path_tem = path_raw+"\\temp\\{}_{}_P{}.pdf".format(name, book_id, page_all)
path_final = path_raw+"\\{}_{}_P{}.pdf".format(name, book_id, page_all)
imgList = os.listdir(path_raw)
print("获取了{}页图片".format(len(imgList)))
imgList.sort(key=lambda x:int(x[:-4]))
img_all=[]
for img in imgList:
img_path=path_raw+"\\{}".format(img)
img_all.append(img_path)
pfn_bytes = img2pdf.convert(img_all, with_pdfrw=False);
#判断临时目录是否存在
if os.path.exists(path_raw+"\\temp") == False:
os.mkdir(path_raw+"\\temp")
with open(path_tem, "wb") as f:
f.write(pfn_bytes)
f.close()
print("准备生成目录...")
#添加目录
try:
doc = fitz.open(path_tem)
toc = get_cata(book_id)
doc.setToC(toc)
doc.save(path_final) # 保存pdf文件
doc.close()
print("保存成功{}".format(path_final))
os.remove(path_tem)
except:
print("添加目录错误,检查页码是否正确")
flag=input("单独添加目录输入1\n")
if flag=='1':
print("-进入单独添加目录-")
try:
book_id = input('输入书的id:\n')
path = input("输入文件目录包括pdf名称")
doc = fitz.open(path)
toc = get_cata(book_id)
doc.setToC(toc)
doc.save(path) # 保存pdf文件
doc.close()
print("添加成功{}".format(path))
except:
print("添加目录失败")
else:
print("-进入PDF合成及目录添加-")
book_id = input('输入书的id:\n')
path_raw = input('输入放着图片的文件夹的全部路径:\n')
pic2pdf(book_id)
4.参考文献
https://zhuanlan.zhihu.com/p/88618967
https://www.jb51.net/article/160622.htm
点个热心 吧
5.软件下载地址
https://www.52pojie.cn/thread-1103527-1-1.html
免费评分
查看全部评分