好友
阅读权限10
听众
最后登录1970-1-1
|
本帖最后由 黑暗煎饼果子 于 2020-9-11 09:30 编辑
知轩藏书是比较不错的精校小说下载网站, 可惜某些时候经常被和谐, 所以做了个下载工具, 吧已经有的都扒下来
脚本是python3.7.1写的, 懒得写界面了, 直接贴脚本
运行以后会根据C_IndexMin和C_IndexMax值下载对应的小说, 目前C_IndexMax设置3W够用了, 知轩现在最大编号才1W2
下载目录为脚本当前目录的\Download\知轩藏书\
如果运行一半关闭再自动会从头开始, 但是已经下载完毕的书会校验, 如果正常就不重复下载了(某些网络波动, 下载的时候正好无法访问导致下载失败)
出现404是因为没有这个编号对应的书, 正常的不用管
'NoneType' object has no attribute 'group', 这个错误好多人反馈, 但是我没有遇到...可能是某几天网站不正常造成的?
如果有能力, 出现问题的时候自己下断点看看是哪个属性找不到导致报错的
我是用pycharm写的, 但是用python自带的idle也能直接运行, 就是下载进度信息不能覆盖挺难看的
如果用pycharm运行没问题
[Python] 纯文本查看 复制代码 import os
import re
import urllib.request
import contextlib
import json
import sys
#吾爱破解论坛 - 黑暗煎饼果子
lWorkPath = os.getcwd() + '\\Download\\知轩藏书\\'
if not os.path.exists(lWorkPath):
os.makedirs(lWorkPath)
lBooksPath = lWorkPath + 'Books\\'
if not os.path.exists(lBooksPath):
os.makedirs(lBooksPath)
C_URLInfo = r'http://www.zxcs.me/post/%d'
C_URLDownload = r'http://www.zxcs.me/download.php?id=%d'
C_IndexMin = 1090 #知轩最小书籍编号是1090
C_IndexMax = 30000
lURLOpener = urllib.request.build_opener(urllib.request.HTTPHandler)
urllib.request.install_opener(lURLOpener)
lURLOpener.addheaders = [
('Host', 'http://www.zxcs.me'),
('Connection', 'keep-alive'),
('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'),
]
def DownloadProgress(blocknum, bs, size):
'''''
blocknum:已经下载的数据块
bs:数据块的大小
size:远程文件的大小
'''
per = 100.0 * blocknum * bs / size
if per > 100 :
per = 100
sys.stdout.write('正在下载: %.2f%%' % per)
sys.stdout.write('\r')
sys.stdout.flush()
lJSONFile = lWorkPath + 'List.json'
if os.path.exists(lJSONFile):
with open(lJSONFile, 'r', encoding='utf-8') as f:
lJBooks = json.load(f)
else:
lJBooks = {}
for lIndex in range(C_IndexMin, C_IndexMax + 1):
print()
lID = str(lIndex)
lJBook = lJBooks.get(lID, {})
lFileName = lJBook.get('File', '')
if (lFileName != '') and os.path.exists(lBooksPath + lFileName):
if 'Size' not in lJBook:
lJBook['Size'] = os.path.getsize(lBooksPath + lFileName)
lJBooks[lID] = lJBook
with open(lJSONFile, 'w', encoding='utf-8') as f:
json.dump(lJBooks, f, ensure_ascii=False)
print('%d 已存在' % lIndex)
print()
continue
# 获取书籍信息
try:
lHTML = lURLOpener.open(C_URLInfo % lIndex).read().decode('utf-8')
# 找信息部分 <div id="content">
lHTMLClass = re.compile(r'(?<=<div id="content">)[^\f]+?(?=</div>)').search(lHTML)
if lHTMLClass:
# 取Title
lTitle = re.compile(r'(?<=<h1>).+?(?=</h1>)').search(lHTMLClass.group(0)).group(0)
# 书名
lJBook['Name'] = re.compile(r'(?<=《).+?(?=》)').search(lTitle).group(0)
# 作者
lJBook['Author'] = re.compile(r'(?<=作者:).+').search(lTitle).group(0)
# 分类
# lJBook['Sort'] = re.compile(r'(?<=<a >).+?(?=</a>)').search(lHTMLClass.group(0)).group(0)
lTempStr = re.compile(r'<a >.+?</a>').search(lHTMLClass.group(0)).group(0)
lJBook['Sort'] = re.compile(r'(?<=>).+(?=<)').search(lTempStr).group(0)
# 标签
# lJBook['Tag'] = re.compile(r'(?<=<a href="http://www.zxcs.me/tag/[^\s>]+?">).+?(?=</a>)').search(lHTMLClass.group(0)).group(0)
lTempStr = re.compile(r'<a href="http://www.zxcs.me/tag/[^\s>]+?">.+?</a>').search(lHTMLClass.group(0)).group(0)
lJBook['Tag'] = re.compile(r'(?<=>).+(?=<)').search(lTempStr).group(0)
lJBook['Desc'] = re.compile(r'(?<=【内容简介】:)[^\f\v]+?(?=</p>)').search(lHTML).group(0).replace('<br />', '').replace(' ', ' ')
except Exception as e:
print('[%d]获取信息失败: %s' % (lIndex, e))
continue
# 获取书籍文件
try:
# 下载页面
lHTML = lURLOpener.open(C_URLDownload % lIndex).read().decode('utf-8')
# 下载地址列表
lDownloadURLList = re.compile(r'(?<=<span class="downfile"><a href=").+?(?=")').findall(lHTML)
if not lDownloadURLList:
raise Exception('无法获取下载地址')
except Exception as E:
print(E)
continue
try:
lDownloaded = False
lErrors = []
for lDownloadURL in lDownloadURLList:
try:
lFileExt = re.compile(r'\.[^\./]+$').search(lDownloadURL).group(0)
lFileName = lID + lFileExt
lJBook['File'] = lFileName
# 先获取要下载的文件大小
lFileSize = 0
try:
with contextlib.closing(urllib.request.urlopen(lDownloadURL, None)) as UR:
lHeaders = UR.info()
lFileSize = int(lHeaders['Content-Length'])
except Exception as E:
lFileSize = 0
print('获取文件大小失败: %s' % E)
# 下载
urllib.request.urlretrieve(lDownloadURL, lBooksPath + lFileName, DownloadProgress)
# 校验文件大小
lRFileSize = int(os.path.getsize(lBooksPath + lFileName))
if lRFileSize != lFileSize:
raise Exception('文件大小不一致')
lJBook['Size'] = lFileSize
lDownloaded = True
break
except Exception as E:
lErrors.append(E)
if lDownloaded:
lJBooks[lID] = lJBook
with open(lJSONFile, 'w', encoding='utf-8') as f:
json.dump(lJBooks, f, ensure_ascii=False)
print('[%d]下载完成%s' % (lIndex, ' ' * 10))
else:
# 下载失败, 从json内删除
if lID in lJBooks:
del lJBooks[lID]
raise Exception(lErrors)
except Exception as E:
if (lFileName != '') and os.path.exists(lBooksPath + lFileName):
os.remove(lBooksPath + lFileName)
print('[%d]下载失败: %s' % (lIndex, E))
input('按任意键结束')
|
免费评分
-
查看全部评分
|