前一阵喜欢上了听小说,这个网站没有app, 手机用浏览器听经常中断。所以想下载下来听,不过网站不提供下载。自己就写了个脚本自动下载。
以下是代码。python3
[Python] 纯文本查看 复制代码 # -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
import requests
import json
import os
headers_page = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36'}
mycookie = "JSESSIONID=25E3BA1D4F62829B476BFD4E87C22CD2; Hm_lvt_ac3da4632dc24e9d361235e3b2d3a131=1576650664; Hm_lpvt_ac3da4632dc24e9d361235e3b2d3a131=1576657499; ting55_history=https%3A%2F%2Fting55.com%2Fbook%2F13679-5%2560%25E5%25BA%2586%25E4%25BD%2599%25E5%25B9%25B4%25EF%25BC%2588%25E9%2597%25AB%25E9%2581%2593%25E4%25B9%258B%25EF%25BC%2589%25E6%259C%2589%25E5%25A3%25B0%25E5%25B0%258F%25E8%25AF%25B4%25E7%25AC%25AC5%25E7%25AB%25A0"
page = 8
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36',
'cookie': mycookie,
'Host': 'ting55.com',
'Origin': 'https://ting55.com',
'Referer': 'https://ting55.com/book/13679-1',
'Sec-Fetch-Site': 'same-origin',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'xt': "keywords"
}
# 根据自己想要听的书,改这里的id, 比如打开庆余年是https://ting55.com/book/13679, 13679就是庆余年的id
book_id = 13679
# 脚本 会在当前目录以book_id创建文件夹,然后把音频下载到这个文件夹
if not os.path.exists(str(book_id)):
os.makedirs(str(book_id))
# 想要下载的章节数,从start_chapter开始,到end_chapter结束
start_chapter = 1
end_chapter = 20
for i in range(start_chapter, end_chapter):
# 先get 页面
the_get = requests.get("https://ting55.com/book/%d-%d" % (book_id, i), headers=headers_page)
soup = BeautifulSoup(the_get.text, 'html.parser')
# print(soup.prettify())
keywords = soup.find(attrs={"name": "_c"})['content']
# 更新header的Referer和xt
headers['Referer'] = 'https://ting55.com/book/%d-%d' % (book_id, i)
headers['xt'] = keywords
# 经常得不到,有时在网页上也是,多次尝试,一直到成功?
while True:
test = requests.post("https://ting55.com/glink", data={"bookId": 13679, "isPay": 0, "page": i}, headers=headers)
dic = json.loads(test.text)
down_url = dic.get("ourl", None)
if len(down_url) > 10:
r = requests.get(down_url)
with open("./%s/%d.m4a" % (book_id, i), "wb") as f:
f.write(r.content)
# 网站有设置,浏览太快了,会禁止一段时间内访问
# for j in range(100):
# time.sleep(1)
# print("sleep %d"%(j, ))
break
else:
print("%d url error"%(i, ))
exit(0)
print(i)
|