本帖最后由 林夕丶 于 2023-6-9 22:04 编辑
[Python] 纯文本查看 复制代码 import requests
from bs4 import BeautifulSoup
import time
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}
ids = "15785"
url = f'http://www.qiuyelou.net/{ids}/'
response = requests.get(url, headers=headers)
response.encoding = "utf-8"
soup = BeautifulSoup(response.text, 'html.parser')
title = soup.find('div', class_='title').h1.text
print('正在下载小说:', title)
chapter_urls = []
chapter_list = soup.select('dd a')
for chapter in chapter_list:
chapter_url = f'http://www.qiuyelou.net/{ids}/{chapter["href"]}'
chapter_urls.append(chapter_url)
print(chapter_urls)
for chap_url in chapter_urls:
retries = 3
while retries > 0:
try:
chapter_response = requests.get(chap_url, headers=headers)
chapter_response.encoding = "utf-8"
chapter_soup = BeautifulSoup(chapter_response.text, 'html.parser')
chapter_title = chapter_soup.find('div', class_='title').h1.text
idclass = chap_url.split("/")[-1].replace(".html", "")
chapter_content = chapter_soup.find('div', id=f'content{idclass}').text
with open(f"缓存/{title}.txt", 'a', encoding='utf-8') as f:
f.write(chapter_title + '\n\n')
f.write(chapter_content + '\n\n')
print('已下载:', chapter_title)
break
except requests.exceptions.RequestException:
print('请求失败,重试...')
retries -= 1
time.sleep(1)
else:
print('无法下载章节:', chap_url)
print('小说下载完成!') |