本帖最后由 miracle1989 于 2024-1-20 12:51 编辑
代码中已添加延迟,还是会提示这个,请问如何解决呢?[Python] 纯文本查看 复制代码 import requests
import json
import os.path
import random
import time
#创建seesion对象用于会话保持
session = requests.Session()
#指定cookie存放的位置
cookiesPath = "./cookies.txt"
#登录
def login():
url = "https://XXX/user/login"
# 设置登录请求的数据(包括邮箱、密码和状态)
data = {
"email": "xxxx@qq.com",
"password": "xxxxxx",
"state": True,
}
# 使用Session对象发送POST请求
response = session.post(
url=url,
data=data,
stream=True,
)
# 读取响应的二进制数据流
data_stream = response.iter_content()
byte = b""
for chunk in data_stream:
byte += chunk
# 将二进制数据转换为文本并打印
text = byte.decode("utf-8")
print(text)
# 将Session对象中的cookies转换为字典
cookie_dict = requests.utils.dict_from_cookiejar(session.cookies)
# 将cookies字典保存到cookies文件中
with open(cookiesPath, "w") as f:
json.dump(cookie_dict, f, ensure_ascii=False)
#初始化
def init():
# 如果cookies文件不存在,则进行登录操作
check_file = os.path.isfile(cookiesPath)
if not check_file:
login()
else:
# 如果cookies文件存在,读取文件内容并设置到Session对象的cookies中
with open(cookiesPath, "r") as f:
cookie_dict = f.read()
session.cookies = requests.utils.cookiejar_from_dict(json.loads(cookie_dict))
#获取章节内容函数,传入章节的ID和章节的名称
def getChapterContent(chapterId, chapterName):
#固定id参数
file_name = f'E:\\Ebook\\any\xxxx\\{book_name}.txt'
#print(file_name)
#id = "il9BADIUZJkFYfO4"
url = f"https://XXX/book/getChapterContent/{chapterId}"
# 使用Session对象发送GET请求
response = session.get(url)
# 如果请求状态码为200,表示请求成功
if response.status_code == 200:
#将JSON数据解析成字典
data = response.json()
with open(file_name, "a", encoding="utf-8") as file:
for chapter in data["data"]["content"]:
# file.write(chapter["text"] + "\n")
content_without_spaces = chapter["text"].replace(" ", "")
file.write(content_without_spaces + "\n")
def getChapter(bookId):
url = "https://XXX/book/chapter"
data = {
"id": bookId,
"page": 1,
"size": 2000,
}
# 在每次请求前加入随机延时
time.sleep(random.uniform(1, 3)) # 随机延时1到3秒
# 使用Session对象发送GET请求
response = session.get(url=url, params=data)
# 如果请求状态码为200,表示请求成功
if response.status_code == 200:
# 将响应的JSON数据解析为字典
data = response.json()
# 遍历所有章节并调用getChapterContent函数获取章节内容
for chapter in data["data"]:
time.sleep(random.uniform(0.5, 6)) # 随机延时0.5到2秒
getChapterContent(chapter["id"], chapter["title"])
print(f"当前处理的章节ID:{chapter['id']}, 章节标题:{chapter['title']}")
def search_books(keyword):
# 构建请求URL
url = "https://XXX/book/search?keyword=" + keyword
# 发送GET请求
response = session.get(url)
# 检查响应状态码是否为200
if response.status_code == 200:
data = response.json()
# 检查返回数据中的code字段是否为200
# 检查返回数据中的code字段是否为200
if data["code"] == 200:
books_data = data["data"]
# 遍历搜索结果并提取书名与book_id(这里假设hash_id就是book_id)
for book in books_data:
book_name = str(book['book_name'])
book_id = int(book['id'])
return book_name, book_id # 如果只想返回第一个结果,请保留此行
# 如果需要返回所有结果作为元组列表,使用以下代码:
# results = [(str(book['book_name']), int(book['id'])) for book in books_data]
# return results
else:
print("搜索错误:", data["msg"])
else:
print(f"请求失败,状态码:{response.status_code}")
if __name__ == '__main__':
cookiesPath1 = "./cookies.txt"
if os.path.exists(cookiesPath1):
os.remove(cookiesPath1)
#调用初始化函数
init()
#keyword = input("请输入要搜索的关键词:")
keyword = "小说名称"
book_name, book_id = search_books(keyword)
#调用getChapter函数,传入书籍ID
getChapter(book_id) |