起点小说下载(小白练手)搜索下载
本帖最后由 lihu5841314 于 2021-6-5 19:43 编辑import requests
import os
from lxml importetree
from urllib.parse import quote
from urllib import parse
name = input("请输入需要下载的小说:")
name1 = name.encode('utf-8')
name2= quote(name) #起点把输入的小说搜索进行了转码
url= f'https://www.qidian.com/search?kw={name2}'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
}
def page_get(url):
rep1 = requests.get(url=url,headers=headers)
rep1.encoding = "UTF-8"
tree = etree.HTML(rep1.text)
returntree
ifnot os.path.exists('./qidian_book'):
os.mkdir('./qidian_book')
tree =page_get(url)
url_1 = tree.xpath('//div[@class="book-mid-info"]/h4/a/@href')
book_url = parse.urljoin('https://book.qidian.com/',url_1) # 获取详情页url
# print(book_url)
tree2 = page_get(book_url)
#获取标题,简介
# book_title = tree2.xpath('/html/body/div/div/div/div/h1/em')
book_info= tree2.xpath('/html/body/div/div/div/div/p')
#获取目录名 获取章节目录列表url
li_list= tree2.xpath('//*[@id="j-catalogWrap"]/div/div/ul/li')
for liin li_list:
book_menu = li.xpath('./a/text()')
detail_url =li.xpath('./a/@href')
detail_url2= parse.urljoin("https://",detail_url)
# print(detail_url2)
#下载章节内容
tree3 = page_get(detail_url2) #出问题了老铁门啥原因呢
book_title2 = tree3.xpath('//h3[@class="j_chapterName"]/span/text()')
book_content = tree3.xpath('//div[@class="main-text-wrap "]/div/p/text()')
book_content2= "".join() #清洗空格 转换成字符串
path = './qidian_book/' + name + ".txt"
with open(path,"a",encoding="utf-8")as pf:
pf.write(book_title2+"\n")
pf.write(book_content2++"\n")
print(book_title2,"下载完毕")
print(name,"下载完毕")
链接:https://pan.baidu.com/s/1nlDrneJ-2WbCx_zBn-Lc8Q
提取码:1234
小白第一次封装 怎么用啊,不懂,能解释一下吗 lhm33204619 发表于 2021-6-5 18:36
怎么用啊,不懂,能解释一下吗
安装python,导入用到的几个模块,运行 lhm33204619 发表于 2021-6-5 18:36
怎么用啊,不懂,能解释一下吗
我也是小白 还没学过封装 lihu5841314 发表于 2021-6-5 19:22
我也是小白 还没学过封装
这个也下不了VIP 章节 没啥用的 注要是交流 感谢分享 以为可以下vip章节 bigdawn 发表于 2021-6-5 22:57
以为可以下vip章节
小白哪有那个水平{:1_896:} 谢谢分享源码! 感谢楼主分享,支持一下!
页:
[1]
2