监控小说更新-自动推送

18251370925 发表于 2018-12-26 14:16

本帖最后由 18251370925 于 2018-12-26 14:19 编辑

在无爱论坛已经好多年了，第一次发这种源码帖
如有不足，欢迎指教

https://static.52pojie.cn/static/image/hrline/1.gif
这个小说监控爬虫是我自己一直在用，挂在服务器上，24小时运行，每隔5分钟检测一下是否更新章节，
如果检测了更新，会自动爬取正文，通过server酱推送到微信上，可以推送单人，也可以推送一群！

https://static.52pojie.cn/static/image/hrline/1.gif

import datetime
import logging
import os

import requests,re
from bs4 import BeautifulSoup
from logging.handlers import TimedRotatingFileHandler

import smtplib,time
from email.mime.text import MIMEText
from email.utils import formataddr

######### 所有长******** 皆为个人使用不公开需要的自己去百度server酱申请key （免费)
my_sender = '****'# 发件人邮箱账号
my_pass = '**********'# 发件人邮箱密码          自己去申请密码
my_user = '****@qq.com'# 收件人邮箱账号，我这边发送给自己

def mail(xinxi):
'''一开始是设置的邮箱通知的现在已经不用了，留给有需要的朋友吧'''
ret=True
try:
   msg=MIMEText('填写邮件内容','plain','utf-8')
   msg['From']=formataddr(["发件人昵称",my_sender])# 括号里的对应发件人邮箱昵称、发件人邮箱账号
   msg['To']=formataddr(["收件人昵称",my_user])          # 括号里的对应收件人邮箱昵称、收件人邮箱账号
   msg['Subject']="%s更新了"%xinxi             # 邮件的主题，也可以说是标题

   server=smtplib.SMTP_SSL("smtp.qq.com", 465)# 发件人邮箱中的SMTP服务器，端口是465
   server.login(my_sender, my_pass)# 括号中对应的是发件人邮箱账号、邮箱密码
   server.sendmail(my_sender,,msg.as_string())# 括号中对应的是发件人邮箱账号、收件人邮箱账号、发送邮件
   server.quit()# 关闭连接
except Exception:# 如果 try 中的语句没有执行，则会执行下面的 ret=False
   ret=False
return ret

def get_nzuixin():
'''获取逆天邪神最新章节'''
try:
   url = r"http://www.nitianxieshen.com/"
   a = requests.get(url, timeout=60)
   a.encoding = "utf-8"
   soup = BeautifulSoup(a.text, 'html.parser')
   b = soup.find_all('li')
   gengxin = {}
   new = []
   link={}
   for i in b:
         if 'juan.html' not in i.find('a').get('href'):
            name = i.find('a').get_text()
            href = i.find('a').get('href')
            zhangjie = re.search('\d+', name)
            if name == "序章":
               break
            if zhangjie:
               new.append(int(zhangjie.group()))
               gengxin[int(zhangjie.group())] = name
               link[int(zhangjie.group())] = href
   zuixin = max(new)
   return zuixin,gengxin,link
except:
   now = time.strftime("%Y-%m-%d %X", time.localtime())
   logger.error("%s获取逆天邪神最新章节报错"%now)
   time.sleep(60)
   get_nzuixin()
def get_qzuixin():
'''获取全职法师最新章节'''
try:
   quanzhi_url = r"https://www.biquge5200.cc/2_2599/"
   quanzhi = requests.get(quanzhi_url, timeout=60)
   quanzhi.encoding = "gbk"
   soup = BeautifulSoup(quanzhi.text, "html.parser")
   t = soup.find_all('dd')
   new = []
   gengxin = {}
   link={}
   for i, v in enumerate(t):
         if i < 9:
            name = v.find('a').get_text()
            href = v.find('a').get('href')
            zhangjie = re.search('\d+', name)
            if zhangjie:
               new.append(int(zhangjie.group()))
               gengxin[int(zhangjie.group())] = name
               link[int(zhangjie.group())] = href
   qzuixin=max(new)
   return qzuixin,gengxin,link
except:
   now = time.strftime("%Y-%m-%d %X", time.localtime())
   logger.error("%s获取全职法师最新章节报错"%now)
   time.sleep(60)
   get_qzuixin()

def send(a,b):
'''这个是一开始单纯发送微信给个人的现在改成了发送给所有订阅的人，也已经用不到了！！！'''
now = time.strftime("%Y-%m-%d %X", time.localtime())
url="https://sc.ftqq.com/****************************"
data={
   "text":a,
   "desp":b
}
res = requests.post(url, data=data)
try:
   if res.json()["code"] == 0:
         print("%s 发送成功，重新发送" % a)
   else:
         print("%s 发送失败，重新发送" % a)

except:
   print('发送出错')

def nsendAll(a, b):
'''发送爬取到的逆天邪神最新章节给所有订阅者'''
url = "https://pushbear.ftqq.com/sub"
data = {
   "sendkey": "***************************",
   "text": a,
   "desp": b,
}
res = requests.post(url, data=data)
try:
   if res.json()["code"] == 0:
         print("%s 发送成功" % a)
   else:
         print(res.json())
         print("%s 发送失败，重新发送" % a)
except:
   print('发送出错')

def qsendAll(a, b):
'''发送爬取到的全职法师最新章节给所有订阅者'''
url = "https://pushbear.ftqq.com/sub"
data = {
   "sendkey": "*************************",
   "text": a,
   "desp": b,
}
res = requests.post(url, data=data)
try:
   if res.json()["code"] == 0:
         print("%s 发送成功" % a)
   else:
         print(res.json())
         print("%s 发送失败，重新发送" % a)
except:
   print('发送出错')

class log():
'''日志文件'''
def __init__(self):
   self.logger = logging.getLogger(__name__)
# 以下三行为清空上次文件
# 这为清空当前文件的logging 因为logging会包含所有的文件的logging
   logging.Logger.manager.loggerDict.pop(__name__)
# 将当前文件的handlers 清空
   self.logger.handlers = []
# 然后再次移除当前文件logging配置
   self.logger.removeHandler(self.logger.handlers)
# 这里进行判断，如果logger.handlers列表为空，则添加，否则，直接去写日志
   if not self.logger.handlers:
# loggger 文件配置路径
         basepath = os.path.dirname(__file__)
         print()
         if int(datetime.datetime.now().month)<10:
            month='0'+str(datetime.datetime.now().month)
         else:
            month=str(datetime.datetime.now().month)
         if int(datetime.datetime.now().day)<10:
            day='0'+str(datetime.datetime.now().day)
         else:
            day=str(datetime.datetime.now().day)
         filename='%s.log' % (str(datetime.datetime.now().year)+month+day)

         #self.handler = logging.FileHandler(filename,encoding='utf-8')
      # self.hander=TimedRotatingFileHandler(filename,encoding='utf-8',when="D", interval=1, backupCount=2)
         self.handler = logging.handlers.TimedRotatingFileHandler("运行日志.log", encoding='utf-8',when='D', interval=1, backupCount=1)
         # # 设置后缀名称，跟strftime的格式一样
         self.handler.suffix = "%Y-%m-%d.log"

# logger 配置等级
         self.logger.setLevel(logging.DEBUG)
# logger 输出格式
         formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(name)s - %(message)s')
# 添加输出格式进入handler
         self.handler.setFormatter(formatter)
# 添加文件设置金如handler
         self.logger.addHandler(self.handler)
def info(self,message=None):
   self.__init__()
   self.logger.info(message)
   self.logger.removeHandler(self.logger.handlers)

def debug(self,message=None):
   self.__init__()
   self.logger.debug(message)
   self.logger.removeHandler(self.logger.handlers)

def warning(self,message=None):
   self.__init__()
   self.logger.warning(message)
   self.logger.removeHandler(self.logger.handlers)

def error(self,message=None):
   self.__init__()
   self.logger.error(message)
   self.logger.removeHandler(self.logger.handlers)

def critical(self, message=None):
   self.__init__()
   self.logger.critical(message)
   self.logger.removeHandler(self.logger.handlers)
logger=log()

abc=[]
nzuixin=1
qzuixin=1
try:
nzuixin,title1,link1 = get_nzuixin()
qzuixin,title2,link2= get_qzuixin()
with open("最新章节.txt", "r") as f:
   chushi = f.readlines()
   for i in chushi:
         if i!='':
            abc.append(int(i))
   if len(abc) < 2:
         with open("最新章节.txt", "w") as f:
            f.write('%d\n'%nzuixin)
            f.write('%d'%qzuixin)
except:

with open("最新章节.txt", "w") as f:
   f.write('%d\n' % nzuixin)
   f.write('%d' % qzuixin)

chushi=int(abc[0])
qchushi=int(abc[1])######获取文本里的最新值
while 1:
try:
   nzuixin,title1,link1=get_nzuixin()
   qzuixin,title2,link2=get_qzuixin()
   if nzuixin>chushi and nzuixin!=None:
         if nzuixin-chushi>5:
            chushi=nzuixin-5

         for d in range(chushi + 1, nzuixin + 1):
            a = "逆天邪神-- {}".format(title1)
            try:
               res = requests.get(link1, timeout=60)
               res.encoding = "utf-8"
               soup = BeautifulSoup(res.text, 'html.parser')
               content = soup.find_all('p')
               s = ''
               for i in content[1:-1]:
                     s = s + str(i)
               s = s.replace('<p>', ' ')
               s = s.replace('</p>', '\n\n')

               nsendAll(a,s)

               chushi =d
               with open("最新章节.txt","w") as f:
                     f.write('%s\n'%str(d))
                     f.write('%s\n'%str(qchushi))
            except:
               print("爬取发送正文%s失败" % title1)

   else:
         now = time.strftime("%Y-%m-%d %X", time.localtime())
         print("%s 逆天邪神还没有更新,最新%s"%(now,title1))

   if qzuixin > qchushi and qzuixin !=None:
         if qzuixin-qchushi>5:
            qchushi=qzuixin-5
         for d in range(qchushi+1,qzuixin+1):
            a="全职法师--{}".format(title2)
            try:
               quanzhi = requests.get(link2, timeout=60)
               quanzhi.encoding = "gbk"
               soup = BeautifulSoup(quanzhi.text, "html.parser")
               content = soup.find('div', id="content")
               content = str(content)
               s = content.replace("<p>", ' ')
               s = s.replace("</p>", '\n\n')
               qsendAll(a,s)
               qchushi = qzuixin
               with open("最新章节.txt", "w") as f:
                     f.write('%s\n' % str(chushi))
                     f.write('%s\n' % str(d))
            except:
               print("爬取发送正文%s失败"%title2)

   else:
         now = time.strftime("%Y-%m-%d %X", time.localtime())
         print("%s 全职法师还没有更新,最新%s" % (now,title2))
   time.sleep(300)
except:
   print("报错，停止爬取2分钟")
   time.sleep(120)

18251370925 发表于 2018-12-26 20:45

azusys 发表于 2018-12-26 16:57
本人才疏学浅没看懂
这个是对比页面还是怎么样才知道有没有更新的呢？
可以修改一下网页有更新自动提醒 ...

是这样的，先获取章节目录最大数比如，现在最新章节为第8章，只要检测目录有了第9章就会爬取正文推送微信

azusys 发表于 2018-12-27 09:57

18251370925 发表于 2018-12-26 20:45
是这样的，先获取章节目录最大数比如，现在最新章节为第8章，只要检测目录有了第9章就会爬取正文推送微 ...

哦这样啊谢谢解答~

sdcbj2016 发表于 2018-12-26 14:31

不错啊
厉害

涛之雨 发表于 2018-12-26 14:33

厉害！
此贴必火。先占沙发
顺便评个分

iamxsh 发表于 2018-12-26 14:44

厉害，膜拜……

windsore 发表于 2018-12-26 14:45

小说迷的最爱啊！你这个为何不干脆直接写个成品接口挂挂小说赚点流量

littlepure 发表于 2018-12-26 14:57

这贴要火，学习学习

oookim 发表于 2018-12-26 15:09

{:1_893:}这个厉害啊啊

Simonl 发表于 2018-12-26 15:10

我还以为是监控类型小说，我还想说是什么鬼呢

cnybad 发表于 2018-12-26 15:31

python 那么简洁的语言，看了你写的代码，真是~~~不过还是要鼓励下的

chenql 发表于 2018-12-26 15:35

这个想法灰常好啊！{:1_927:}

页: [1] 2 3

吾爱破解 - 52pojie.cn's Archiver

监控小说更新-自动推送