本帖最后由 mingxin123 于 2022-8-3 09:04 编辑
超级简单爬取笔趣阁小说的Python代码,只需要一个Python环境就能运行
技术栈:requests,xpath超级简单爬取笔趣阁小说的Python代码,只需要一个Python环境就能运行
技术栈:requests,xpath
直接上代码
import os
import requests
from lxml import etree
def download_txt(name):
params = {
"keyword": name
}
host = "https://www.1biqug.com"
resp = requests.get("https://www.1biqug.com/searchbook.php", params=params)
html = resp.content.decode()
html = etree.HTML(html)
ret_list = html.xpath("//li/span[@class='s2']/a/@href")
detail_url = host + ret_list[0]
resp = requests.get(detail_url)
html = etree.HTML(resp.content.decode())
ret_list = html.xpath("//div[@id='list']//dd//a/@href")
print(ret_list)
if not os.path.exists("./{}".format(name)):
os.mkdir("./{}".format(name))
for ret in ret_list[12:]:
url = host + ret
resp = requests.get(url)
info = resp.content.decode()
html = etree.HTML(info)
title = html.xpath("//h1/text()")
print(title[0])
path = os.path.join(name, title[0] + ".html")
path = path.replace("*", "")
with open(path, 'w', encoding="utf8") as f:
f.write(info)
print(name, "下载完成了")
if __name__ == '__main__':
story = input("请输入小说名")
download_txt(story)
附上运行成功截图
更新更新
有朋友说要把格式转成txt格式,这里更新一版结果是txt格式的代码,还是直接复制过去就可以用。
import os
import requests
from lxml import etree
def download_txt(name):
params = {
"keyword": name
}
host = "https://www.1biqug.com"
resp = requests.get("https://www.1biqug.com/searchbook.php", params=params)
html = resp.content.decode()
html = etree.HTML(html)
ret_list = html.xpath("//li/span[@class='s2']/a/@href")
detail_url = host + ret_list[0]
resp = requests.get(detail_url)
html = etree.HTML(resp.content.decode())
ret_list = html.xpath("//div[@id='list']//dd//a/@href")
print(ret_list)
if not os.path.exists("./{}".format(name)):
os.mkdir("./{}".format(name))
for ret in ret_list[12:]:
url = host + ret
resp = requests.get(url)
info = resp.content.decode()
html = etree.HTML(info)
title = html.xpath("//h1/text()")
path = os.path.join(name, title[0] + ".txt")
path = path.replace("*", "")
content = html.xpath("//div[@id='content']//text()")
if os.path.exists("./{}/{}".format(name, title)):
os.remove("./{}/{}".format(name, title))
f_content = open(path, "a", encoding="utf-8")
for con in content:
if "chaptererror();" in con or "本站最新域名:" in con:
break
f_content.write(con + "\r\n")
f_content.close()
print(title[0])
print(name, "下载完成了")
if __name__ == '__main__':
story = input("请输入小说名:")
download_txt(story)
附上运行成功截图
正文内容
|