求指点从网页获取文字有空格并且隐藏了数字
import requestsfrom bs4 import BeautifulSoup
url = "https://fanqienovel.com/page/7356041190682135577?enter_from=stack-room"
response = requests.get(url)
if response.status_code == 200:
content = response.text
soup = BeautifulSoup(content, "html.parser")
chapter_div = soup.find("div", {"class": "chapter"})
if chapter_div:
chapter_items = chapter_div.find_all("div", {"class": "chapter-item"})
for index, chapter_item in enumerate(chapter_items, start=1):
chapter_title_a = chapter_item.find("a", {"class": "chapter-item-title"})
chapter_title = chapter_title_a.text
chapter_link = chapter_title_a['href']
print(f"第{index}章:{chapter_title} - https://fanqienovel.com{chapter_link}")
# 获取章节内容
chapter_url = f"https://fanqienovel.com{chapter_link}"
chapter_response = requests.get(chapter_url)
if chapter_response.status_code == 200:
chapter_content = chapter_response.text
chapter_soup = BeautifulSoup(chapter_content, "html.parser")
chapter_text_div = chapter_soup.find("div", {"class": "muye-reader-content noselect"})
if chapter_text_div:
chapter_text = chapter_text_div.get_text(strip=True)
chapter_lines = chapter_text.splitlines()
cleaned_chapter_lines = for line in chapter_lines if line.strip()]
cleaned_chapter_text = "\n".join(cleaned_chapter_lines)
print(cleaned_chapter_text)
else:
print("未找到章节内容")
else:
print(f"请求失败,状态码:{chapter_response.status_code}")
else:
print("未找到分章节内容")
else:
print(f"请求失败,状态码:{response.status_code}")
获取的文字被隐藏了,有没有大佬指点一下 那不是空格就是字符没显示出来,这网站有字体加密的 https://gitee.com/awuer/ZikiImages/raw/master/1712726895819.png字体加密
https://gitee.com/awuer/ZikiImages/raw/master/1712727026820.png用这个字体解密 鹿鸣 发表于 2024-4-10 12:36
那不是空格就是字符没显示出来,这网站有字体加密的
大佬如何攻克难题啊
网上查下,番茄或起点小说网站字体解密,顺便学习一把 字体加密 用OCR识别或者手机端的网页 https://www.52pojie.cn/thread-1911970-1-1.html
跟这个原理差不多,学一下思路吧~
页:
[1]