使用Python爬虫一键爬取XX阁小说
本帖最后由 mingxin123 于 2022-8-3 09:04 编辑超级简单爬取笔趣阁小说的Python代码,只需要一个Python环境就能运行
技术栈:requests,xpath#### 超级简单爬取笔趣阁小说的Python代码,只需要一个Python环境就能运行
##### 技术栈:requests,xpath
直接上代码
```python
import os
import requests
from lxml import etree
def download_txt(name):
params = {
"keyword": name
}
host = "https://www.1biqug.com"
resp = requests.get("https://www.1biqug.com/searchbook.php", params=params)
html = resp.content.decode()
html = etree.HTML(html)
ret_list = html.xpath("//li/span[@class='s2']/a/@href")
detail_url = host + ret_list
resp = requests.get(detail_url)
html = etree.HTML(resp.content.decode())
ret_list = html.xpath("//div[@id='list']//dd//a/@href")
print(ret_list)
if not os.path.exists("./{}".format(name)):
os.mkdir("./{}".format(name))
for ret in ret_list:
url = host + ret
resp = requests.get(url)
info = resp.content.decode()
html = etree.HTML(info)
title = html.xpath("//h1/text()")
print(title)
path = os.path.join(name, title + ".html")
path = path.replace("*", "")
with open(path, 'w', encoding="utf8") as f:
f.write(info)
print(name, "下载完成了")
if __name__ == '__main__':
story = input("请输入小说名")
download_txt(story)
```
附上运行成功截图
##### 更新更新
##### 有朋友说要把格式转成txt格式,这里更新一版结果是txt格式的代码,还是直接复制过去就可以用。
```python
import os
import requests
from lxml import etree
def download_txt(name):
params = {
"keyword": name
}
host = "https://www.1biqug.com"
resp = requests.get("https://www.1biqug.com/searchbook.php", params=params)
html = resp.content.decode()
html = etree.HTML(html)
ret_list = html.xpath("//li/span[@class='s2']/a/@href")
detail_url = host + ret_list
resp = requests.get(detail_url)
html = etree.HTML(resp.content.decode())
ret_list = html.xpath("//div[@id='list']//dd//a/@href")
print(ret_list)
if not os.path.exists("./{}".format(name)):
os.mkdir("./{}".format(name))
for ret in ret_list:
url = host + ret
resp = requests.get(url)
info = resp.content.decode()
html = etree.HTML(info)
title = html.xpath("//h1/text()")
path = os.path.join(name, title + ".txt")
path = path.replace("*", "")
content = html.xpath("//div[@id='content']//text()")
if os.path.exists("./{}/{}".format(name, title)):
os.remove("./{}/{}".format(name, title))
f_content = open(path, "a", encoding="utf-8")
for con in content:
if "chaptererror();" in con or "本站最新域名:" in con:
break
f_content.write(con + "\r\n")
f_content.close()
print(title)
print(name, "下载完成了")
if __name__ == '__main__':
story = input("请输入小说名:")
download_txt(story)
```
附上运行成功截图
wenxin2150 发表于 2022-8-2 15:53
你这是直接将小说每一章节的网页下载下来吗?我还以为你是把小说内容爬取出来
比较懒哈哈哈 感谢分享 向楼主学习! 想问下能否替换其他网站呢,意思是把某阁换成其他网站。 到老做个成品吧 哈哈辛苦 谢谢分享!好人一生平安! 在合成一个txt文件感觉更好 感谢分享 好家伙再做一个合并功能就好了 kyle233 发表于 2022-8-2 15:40
感谢楼主,可以转成TXT格式吗
可以,我今天再更新一下