本帖最后由 KIMG 于 2015-10-14 18:07 编辑
增加了poco论坛活动的抓取------
发一张我运行的截图
背景如下:
本人是位屌丝摄影师,平常想多拍拍美女啥的,怎么办呢???
-----------------------------------------------------------------------------------
各个论坛都会有免费的摄影活动,不过有时候关注晚了就会没有名额了,而是我写了这款小工具
每30s监控是否有新活动,有的话发邮件到qq邮箱,然后qq邮箱是绑定的微信,所以,如果有活动,我可以第一时间参与
我抓的是蜂鸟网的,大家如果知道哪些网站会经常发布免费的活动的话,麻烦提供给我~~谢谢
代码如下
[Python] 纯文本查看 复制代码 # coding: utf-8
import smtplib
from email.mime.text import MIMEText
import requests
from lxml import etree
import os
import time
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
class mailhelper(object):
'''
这个类实现发送邮件的功能
'''
def __init__(self):
self.mail_host="smtp.sina.com" #设置服务器
self.mail_user="fasong_1_12" #用户名
self.mail_pass="yijiayidengyu2" #密码
self.mail_postfix="sina.com" #发件箱的后缀
def send_mail(self,to_list,sub,content):
me="huodonghelper"+"<"+self.mail_user+"@"+self.mail_postfix+">"
msg = MIMEText(content,_subtype='plain',_charset='utf-8')
msg['Subject'] = sub
msg['From'] = me
msg['To'] = ";".join(to_list)
try:
server = smtplib.SMTP()
server.connect(self.mail_host)
server.login(self.mail_user,self.mail_pass)
server.sendmail(me, to_list, msg.as_string())
server.close()
return True
except Exception, e:
print str(e)
return False
class huodonghelper(object):
'''
这个类实现将爬取活动第一条内容
'''
def __init__(self):
self.url1 = 'http://huodong.fengniao.com/apply-list-2-0-0-0-.html' #请输入准备抓取的活动地址
self.url2 = 'http://event.poco.cn/module/event_get_info_data_v2.php?location_id=101003001&category=2&search_time_type=future_all&p=1&get_left_menu=1&list_index=1&show_type=image_text_list&show_type_index=0&first_category=2'
def fengniao(self):
html = requests.post(self.url1).content
selector = etree.HTML(html)
try:
title = selector.xpath('/html/body/div[3]/div[1]/ul/li[1]/div/div/h4/a[2]/text()')[0] # 活动标题
hdtime = selector.xpath('/html/body/div[3]/div[1]/ul/li[1]/div/div/p[1]/text()')[0] # 活动时间
hddd = selector.xpath('/html/body/div[3]/div[1]/ul/li[1]/div/div/p[2]/text()')[0] # 活动地点
hdfs = selector.xpath('/html/body/div[3]/div[1]/ul/li[1]/div/div/p[3]/span/text()')[0] # 活动方式
syme = selector.xpath('/html/body/div[3]/div[1]/ul/li[1]/div/div/h4/span/text()')[0]# 剩余名额
href = selector.xpath('/html/body/div[3]/div[1]/ul/li[1]/div/div/h4/a[2]/@href')[0]
except:
print u'出错了'
href = 'http://huodong.fengniao.com'+ href
sendtext = '活动网址:'+ href + '\n' +'标题:'+ title + '\n' +'活动时间:' + hdtime +'\n' + '活动地点:' + hddd +'\n'+ '活动方式:' + hdfs +'\n'+ '剩余人数:' + syme
if syme == u'报名进行中':
# sss = selector.xpath('/html/body/div[3]/div[1]/ul/li[1]/div/div/p[4]/span[1]/text()')
# xsf = selector.xpath('/html/body/div[3]/div[1]/ul/li[1]/div/div/p[4]/span[1]/span[starts-with(@id,"")]/text()')
# ts = sss[0]+xsf[0]+sss[1]+xsf[1]+sss[2]+xsf[2]+sss[3] # 报名还剩:3天5小时58分
qr = selector.xpath('/html/body/div[3]/div[1]/ul/li[1]/div/div/p[4]/span[2]/span/text()')[0]
sy = selector.xpath('/html/body/div[3]/div[1]/ul/li[1]/div/div/p[4]/span[2]/strong/text()')[0]
ts ='\n' +u'确认'+qr+u'剩余'+sy
sendtext = sendtext + '\n' +'提示:'+ts
return sendtext
def poco(self):
html = requests.post(self.url2).text
selector = etree.HTML(html)
try:
title = selector.xpath('/html/body/div/ul/li[starts-with(@id,"")]/div[2]/h3/a/text()')[0]
href = selector.xpath('/html/body/div/ul/li[starts-with(@id,"")]/div[2]/h3/a/@href')[0]
href = 'http://event.poco.cn/event_browse.php?'+href[-14:]
hdjs = selector.xpath('/html/body/div/ul/li[starts-with(@id,"")]/div[2]/p[2]/text()')[0]
hdlb = selector.xpath('/html/body/div/ul/li[starts-with(@id,"")]/div[2]/table/tr[1]/td[1]/p/text()')[0]
hdsj = selector.xpath('/html/body/div/ul/li[starts-with(@id,"")]/div[2]/table/tr[1]/td[2]/p/text()')[0]
hdfy = selector.xpath('/html/body/div/ul/li[starts-with(@id,"")]/div[2]/table/tr[1]/td[3]/p/text()')[0]
bmrs = selector.xpath('/html/body/div/ul/li[starts-with(@id,"")]/div[2]/table/tr[2]/td[1]/p/text()')[0]
hddz = selector.xpath('/html/body/div/ul/li[starts-with(@id,"")]/div[2]/table/tr[2]/td[3]/p/span[2]/text()')[0]
except:
print u'出错了'
sendtext1 = '活动标题:'+title+'\n'+'活动介绍:'+hdjs+'\n'+'活动类别:'+hdlb+'\n'+'活动时间:'+hdsj+'\n'+'活动费用:'+hdfy+'\n'+'报名人数:'+bmrs+'\n'+'活动地址:'+hddz
return sendtext1
def tosave(self,text,wjm):
f= open(wjm+'.txt','a')
f.write(text + '\n')
f.close()
def tocheck(self,data,wjm):
if not os.path.exists(wjm+'.txt'):
return True
else:
f = open(wjm+'.txt', 'r')
existfengniao = f.read()
if data + '\n' in existfengniao:
return False
else:
return True
if __name__ == '__main__':
mailto_list=[''] #此处填写接收邮件的邮箱,例如qq邮箱啥的
helper = huodonghelper()
while True:
content = helper.fengniao()
if helper.tocheck(content,'fengniao'):
if mailhelper().send_mail(mailto_list,u"蜂鸟有活动啦",content):
print u"发送成功"
else:
print u"发送失败"
helper.tosave(content,'fengniao')
print content
else:
print u'pass'
content1 = helper.poco()
if helper.tocheck(content1,'poco'):
if mailhelper().send_mail(mailto_list,u"POCO有活动啦",content1):
print u"发送成功"
else:
print u"发送失败"
helper.tosave(content1,'poco')
print content1
else:
print u'pass'
time.sleep(30)
|