[Python] 纯文本查看 复制代码 """
奇书网小说爬取
"""
import pinyin
import requests
import parsel
import os
NOVEL_CLASS = {'穿越', '言情', '都市', '百合', '历史', '同人', '武侠', '玄幻', '惊悚', '科幻', '网游', '哲学', '资料'}
def start(type_name='穿越'):
print(f"开始爬取{type_name}小说")
os.mkdir(f"{type_name}小说")
for index in range(1, 2):
url = 'http://www.vbiquge.co/xclass/' + pinyin.get(type_name, format='strip', delimiter="") + '/' + str(
index)
req = requests.get(url=url)
req.encoding = 'utf-8'
selector = parsel.Selector(req.text)
a_href_list = selector.css('#fengtui > div > div > div.bookinfo > h4 > a::attr(href)').getall()
for a in a_href_list:
url_c = 'http://www.vbiquge.co' + a
xiaoshuo = requests.get(url_c)
xiaoshuo.encoding = 'utf-8'
selector_2 = parsel.Selector(xiaoshuo.text)
# 获取下载TXT按钮
btn = selector_2.css('body > div.container > div.content > div:nth-child(2) > div.bookinfo > div > '
'a:nth-child(3)::attr(href)').getall()
download_url = f'http://www.vbiquge.co{btn[0]}'
title = download_url.split('=')[-1] + '.txt'
download = requests.get(download_url).content
with open(f'{type_name}小说/{title}', mode='wb') as f:
f.write(download)
print(title, '下载完成')
if __name__ == '__main__':
type_name = input("请输入你要爬取的小说类型:")
if type_name not in NOVEL_CLASS:
print('类型错误')
start(type_name)
|