[Python] 纯文本查看 复制代码
import requests
from Crypto.Cipher import AES
from Crypto.Util.Padding import pad, unpad
import base64
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor
import os
import time
from tqdm import tqdm
# 定义加密解密类
class Crypter:
def __init__(self):
self.key = base64.b64decode('le95G3hnFDJsBE+1/v9eYw==')
self.iv = base64.b64decode('IvswQFEUdKYf+d1wKpYLTg==')
def encrypt(self, data):
data = data.encode('utf-8')
cipher = AES.new(self.key, AES.MODE_CBC, self.iv)
ciphertext = cipher.encrypt(pad(data, AES.block_size))
return base64.b64encode(ciphertext).decode('utf-8')
def decrypt(self, ciphertext):
ciphertext = base64.b64decode(ciphertext)
cipher = AES.new(self.key, AES.MODE_CBC, self.iv)
data = unpad(cipher.decrypt(ciphertext), AES.block_size)
return data.decode('utf-8')
# 获取mp3链接
def get_mp3_url(url):
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
mp3_url_list = []
for a in soup.find_all('a', title=True, onclick=True):
title = a['title']
if title:
id = a['onclick'].split("'")[1]
url = f'http://www.yuetingba.cn/api/app/docs-listen/{id}/ting-with-ef'
mp3_url = 'http://117.65.51.119:50010' + Crypter().decrypt(requests.get(url, headers=headers).json()['ef'])
mp3_url_list.append((title, mp3_url))
return mp3_url_list
# 下载mp3文件
def download_mp3(mp3_url_list):
with ThreadPoolExecutor(max_workers=None) as executor:
futures = []
for title, mp3_url in mp3_url_list:
# print(title,mp3_url)
filename = f'资治通鉴(白话文)/{title}.mp3'
if os.path.exists(filename):
print(f'{title}.mp3 已存在')
continue
future = executor.submit(download_file, mp3_url, filename)
futures.append(future)
# 使用tqdm显示进度条
for future in tqdm(futures):
try:
future.result()
except Exception as e:
print(f'下载失败: {e}')
# 下载文件的辅助函数
def download_file(url, filename):
response = requests.get(url, headers=headers)
with open(filename, 'wb') as f:
f.write(response.content)
print(f'{filename} 下载完成')
# 主程序
if __name__ == '__main__':
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:130.0) Gecko/20100101 Firefox/130.0',
}
base_url = 'http://www.yuetingba.cn/book/detail/3a0a4394-004b-58cc-bee6-fd01e7787da4/0'
response = requests.get(base_url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
nav_tabs = soup.find('ul', class_='nav nav-tabs').find_all('a')
#使用多线程下载
with ThreadPoolExecutor() as executor:
for a in nav_tabs:
url = 'http://www.yuetingba.cn' + a['href']
# print(url)
mp3_url_list = get_mp3_url(url)
executor.submit(download_mp3, mp3_url_list)
多线程下载的也很慢 |