本帖最后由 lihu5841314 于 2021-6-5 19:43 编辑
[Asm] 纯文本查看 复制代码 import requests
import os
from lxml import etree
from urllib.parse import quote
from urllib import parse
name = input("请输入需要下载的小说:")
name1 = name.encode('utf-8')
name2= quote(name) #起点把输入的小说搜索进行了转码
url = f'https://www.qidian.com/search?kw={name2}'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
}
def page_get(url):
rep1 = requests.get(url=url,headers=headers)
rep1.encoding = "UTF-8"
tree = etree.HTML(rep1.text)
return tree
if not os.path.exists('./qidian_book'):
os.mkdir('./qidian_book')
tree =page_get(url)
url_1 = tree.xpath('//div[@class="book-mid-info"]/h4/a/@href')[0]
book_url = parse.urljoin('https://book.qidian.com/',url_1) # 获取详情页url
# print(book_url)
tree2 = page_get(book_url)
#获取标题,简介
# book_title = tree2.xpath('/html/body/div/div[6]/div[1]/div[2]/h1/em')[0]
book_info = tree2.xpath('/html/body/div/div[6]/div[1]/div[2]/p[2]')[0]
#获取目录名 获取章节目录列表url
li_list= tree2.xpath('//*[@id="j-catalogWrap"]/div[2]/div[1]/ul/li')
for li in li_list:
book_menu = li.xpath('./a/text()')[0]
detail_url =li.xpath('./a/@href')[0]
detail_url2= parse.urljoin("https://",detail_url)
# print(detail_url2)
#下载章节内容
tree3 = page_get(detail_url2) #出问题了 老铁门啥原因呢
book_title2 = tree3.xpath('//h3[@class="j_chapterName"]/span[1]/text()')[0]
book_content = tree3.xpath('//div[@class="main-text-wrap "]/div[2]/p/text()')
book_content2= "".join([x.strip() for x in book_content]) # 清洗空格 转换成字符串
path = './qidian_book/' + name + ".txt"
with open(path,"a",encoding="utf-8") as pf:
pf.write(book_title2+"\n")
pf.write(book_content2++"\n")
print(book_title2,"下载完毕")
print(name,"下载完毕")
链接:https://pan.baidu.com/s/1nlDrneJ-2WbCx_zBn-Lc8Q
提取码:1234
小白第一次封装 |