使用py自带库_自己追更的小说下载脚本

onepc 发表于 2022-11-15 13:30

使用ini配置，支持接着下载，合适网上没有txt下载的刚发的小说并喜欢下载到手机看txt的这种场景
代码太差，请轻喷。

#!/usr/bin/env python
#coding=utf8

import urllib.request,re,os,io,gzip,sys,configparser,time

RootDir = os.path.dirname(sys.argv)

TmpName = re.sub('\.exe$|\.py$','',os.path.basename(sys.argv),flags=re.I)

ConfigFile = os.path.join(RootDir,'%s.ini' % TmpName )
XsDir = os.path.join(RootDir,'download')

XsLogFile = os.path.join(RootDir,'%s.log' % TmpName)

D_rule = {}
L_cfg =['config','regular']

headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36',}

def ungzip(data):
try:
data=gzip.decompress(data)
except:
pass
return data

#def geturldata(url,headers,code):
#src=''
#try:
# request = urllib.request.Request(url=url,headers=headers)
# response = urllib.request.urlopen(request)
# src = ungzip(response.read()).decode(code)
#except urllib.error.HTTPError:
# print ('%s 网站无法打开.'%url)
#except urllib.error.URLError:
# print ('%s URL异常.'%url)
#except:
# print('异常无法访问 %s' % url)
##finally:
##response.close()
#return src

def writexs(con,xsrc,code,title,section):
if section == 'bkneng':
xsrc = xsrc.replace(' ',' ')#.replace(u'\xa0', u' ')
xsrc = xsrc.replace('<br/>','').replace('<br />','').replace('<br>','').replace('<p>','\n').replace('</p>','\n').replace('\r','')
else:
xsrc = xsrc.replace(' ',' ')#.replace(u'\xa0', u' ')
xsrc = xsrc.replace('<br/>','').replace('<br />','').replace('<br>','').replace('<p>','').replace('</p>','').replace('\r','')

workxs=os.path.join(XsDir,title)
if os.path.isfile(workxs):
with open(workxs,'a',encoding=code) as f:
   f.write(con)
   f.write('\n')
   f.write(xsrc)
   f.write('\n')
else:
with open(workxs,'w',encoding=code) as f:
   f.write(con)
   f.write('\n')
   f.write(xsrc)
   f.write('\n')


def getxs(link,cont,code,r_content,title,section):
rq = urllib.request.Request(link,headers=headers)
resp = urllib.request.urlopen(rq)
contsrc = ungzip(resp.read()).decode(code)
resp.close()
#print (contsrc)
#chk='Y'
#contsrc=geturldata(link,headers,code)
#if len(contsrc)==0:
#chk='N'
contxs = re.findall(r_content,contsrc,re.S)
if len(contxs)>0:
writexs(cont,contxs,code,title,section)
#return chk

def printmsg(a1,a2,a3,logflag):
if logflag=='Y':
currsj = time.strftime('[%Y-%m-%d %H:%M:%S]',time.localtime())
#print (a1,a2,a3)
line = '%s %s %s %s\n' % (currsj,a1,a2,a3)
with open(XsLogFile,'a',encoding='utf-8') as f:
   f.write(line)

def downxs(urllist,code,r_title,r_url,r_content,objcfg,section,logflag):
for urlseq in urllist:
desc = urlseq.strip()
url = urlseq.split(',').strip()
seqnum = int(urlseq.split(',').strip())
#print (url)
request = urllib.request.Request(url=url,headers=headers)
response = urllib.request.urlopen(request)
src = ungzip(response.read()).decode(code)
response.close()
#src=geturldata(url,headers,code)
if len(src)==0:continue
#time.sleep(1)
shuname = re.findall(r_title,src)
if len(shuname)>0:
   title = '%s.txt' % shuname
else:
   print ('%s 获取不了标题' % desc)
   continue
xs=[]
if '%s_tmpurl' % section in D_rule:
   xs_1 = re.findall(D_rule['%s_tmpurl' % section],src,re.S)
   if len(xs_1)==0:
   print ('%s 获取不了章节' % desc)
   continue
   xs = re.findall(r_url,xs_1)
else:
   xs = re.findall(r_url,src)

if len(xs)>0:
   xscount = len(xs)
   currsj = time.strftime('[%Y-%m-%d %H:%M:%S]',time.localtime())
   print ('%s %s 共%s章,正从第%s章开始下载...' % (currsj,shuname,str(xscount),str(seqnum+1) ))
   kk = 0
   #chkxs = 'Y'
   for i in xs:
   kk += 1
   if kk>seqnum:
      if section == 'biququ':
         printmsg (kk,'%s%s'%('https://www.biququ.com/',i),i,logflag)
         getxs('%s%s'%('https://www.biququ.com/',i),i,code,r_content,title,section)
      elif section == 'bkneng':
         printmsg (kk,'%s%s'%('https://wenxue.bkneng.com',i),i,logflag)
         getxs('%s%s'%('https://wenxue.bkneng.com',i),i,code,r_content,title,section)
      elif section == 'xibiquge':
         printmsg (kk,'%s%s'%('http://www.xibiquge.com',i),i,logflag)
         getxs('%s%s'%('http://www.xibiquge.com',i),i,code,r_content,title,section)
      else:
         printmsg (kk,i,i,logflag)
         getxs(i,i,code,r_content,title,section)
      #if chkxs =='N':
      #continue
      objcfg.set(section, desc,'%s,%s' % (url,str(kk)))
      with open(ConfigFile, 'w',encoding='utf-8') as f:
         objcfg.write(f)
   currsj = time.strftime('[%Y-%m-%d %H:%M:%S]',time.localtime())
   print ('%s %s 下载完成,共更新%s章!' % (currsj,shuname,str(kk-seqnum)))
   print ('-'*50)

if __name__ == '__main__':
if not os.path.isfile(ConfigFile):
print('%s 配置文件不存在.' % ConfigFile)
sys.exit()

if not os.path.exists(XsDir):
os.mkdir(XsDir)

objconf = configparser.ConfigParser()
objconf.read(ConfigFile, encoding="utf8")
l_rule = objconf.items("regular")

logflag=objconf.get('config','logflag')

for x in l_rule:
D_rule]=x

l_sections = objconf.sections()

for x in l_sections:
if x not in L_cfg:
   l_x = objconf.items(x)
   if len(l_x)>0 and '%s_title' % x in D_rule and '%s_url' % x in D_rule and '%s_content' % x in D_rule and '%s_code' % x in D_rule:
   #print(l_x)
   downxs(l_x,D_rule['%s_code' % x],D_rule['%s_title' % x],D_rule['%s_url' % x],D_rule['%s_content' % x],objconf,x,logflag)

ini配置文件：

logflag = Y

imayitxt_title = <h1 class="page-title ar_titled">(.*)?</h1>
imayitxt_url = <a href="(.*)" class="name">(.*)</a>
imayitxt_content = <div class="page-content " id="ChapterContents">(.*)</div>.*</div></div><div class="ft"><script>
imayitxt_code = utf-8
biququ_title = <dt>(.*)?全部章节</dt>
biququ_url = <dd><a href="(.*)">(.*)</a></dd>
biququ_content = <div id="content">\s*<div class="read_tj">.*?</div>(.*)<script>chaptererror;</script>
biququ_code = utf-8
bkneng_title = <h2 class="left">(.*)?</h2>
bkneng_url = <a href="(.*)" title="(.*)">
bkneng_content = <div class="myContent" flag="1" style="position: relative">(.*?)</div>
bkneng_code = utf-8
xibiquge_title = <dt>《(.*)?》正文</dt>
xibiquge_tmpurl = 正文</dt>(.*)</dl>
xibiquge_url = <dd><a href="(.*)">(.*)</a></dd>
xibiquge_content = <div id="content">(.*?)</div>
xibiquge_code = gbk

西游：瞎眼五百年，弟子全是大妖 = http://www.xibiquge.com/33_33623/,189
深海余烬 = http://www.xibiquge.com/28_28931/,227
低调在修仙世界 = http://www.xibiquge.com/19_19514/,211

打工先知 = http://www.imayitxt.com/showclist/162715.html,292
开局失业，我让歌坛大魔王回归 = http://www.imayitxt.com/showclist/166684.html,261
我在异界肝经验 = http://www.imayitxt.com/showclist/161789.html,348
修炼从简化功法开始 = http://www.imayitxt.com/showclist/164116.html,334

wangwlex1988 发表于 2022-11-15 14:53

学习着，有用

8359 发表于 2022-11-15 15:11

感谢分享

heimaoct 发表于 2022-11-15 15:56

先收藏慢慢学习

qxlsl 发表于 2022-11-15 16:47

好东西，学到了

qianseshitou 发表于 2022-11-15 17:04

感谢无私分享

monoegod 发表于 2022-11-16 10:26

有点东西

qfxldhw 发表于 2022-11-16 13:10

有点东西

ERMU 发表于 2023-7-18 08:50

学习学习

lingwushexi 发表于 2023-7-18 08:59

先收藏慢慢学习

页: [1] 2

吾爱破解 - 52pojie.cn's Archiver

使用py自带库_自己追更的小说下载脚本