本帖最后由 lcxxhp 于 2021-2-1 11:23 编辑
仅供学习参考使用,严禁用于商业用途。严禁用于商业用途
附上可执行python源码仅供交流使用!
截止发稿 2021-2-1 可用
[Python] 纯文本查看 复制代码
# -*- coding: utf-8 -*-
"""Created on Wed Nov 4 13:49:37 2020
@author: Administrator
"""
import requests
import re
import time
headers = {
'user-agent': 'User-AgentMozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'
}
print ('本脚本仅适用于00kxs看书网:https://www.00kxs.com/')
#url_list = 'http://www.00kxs.com/html/4/4918/'
url_list = (input('粘贴小说目录url,必须是小说目录,小说首页不支持\n'))
downurl = 'http://www.00kxs.com/html/'
url_list = requests.get(url_list)
url_list.encoding ='GB2312'
text_list = url_list.text
#爬小说书名
text_title = re.findall(r'meta property="og:novel:book_name" content="(.*?)"/>',text_list,re.S)[0]
#爬小说目录列表
text_list_info = re.findall(r'<div class="volume">(.*?)</ul>',text_list,re.S)[1]
text_list_info = re.findall(r'<a href="/html/(.*?)">(.*?)</a>',text_list_info)
t = 0
for i in text_list_info:
#每章小说的url和每章章名
list = i[0]
name = i[1]
download = downurl + list
download_info = requests.get(url = download,headers=headers)
download_info.encoding ='GB2312'
html=download_info.text
html_info = re.findall(r'<div id="content">(.*?)</div>',html,re.S)[0]
html_info = html_info.replace ('<p>','')
html_info = html_info.replace ('</p>','')
print (name)
t = t + 1
k = t % 250
if k == 0:
#print("休息20秒,防服务器踢人")
time.sleep(20)
#输出为记事本
with open ('%s.txt' % text_title,'a+',encoding = 'utf-8')as f:
f.write(name + '\n')
f.write(html_info + '\n')
f.write('\n')
print ('下载完成')
|