[Python] 纯文本查看 复制代码
def askURL(url):
head = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ""Chrome/92.0"
".4515.107 ""Safari/537.36 "
}
r = requests.get(url, headers=head).content.decode("utf-8")
# print(r)
return r
def getLink(): # 获取最新链接
datalist = []
html = askURL("https://www.pmtown.com/archives/category/%e6%97%a9%e6%8a%a5")
soup = BeautifulSoup(html, "html.parser")
link1 = soup.find("a", class_="media-content", target="_blank")
link1 = str(link1)
link2 = re.findall(link, link1)
datalist.append(link2)
link3 = [str(i) for i in link2] # 解析列表转成文本
link4 = ''.join(link3)
# print(link4)
return link4
# ---------------------------------------------爬取早报内容-------------------------------------------------#
def getNews(news_url):
news1 = []
html1 = askURL(news_url)
soup2 = BeautifulSoup(html1, "html.parser")
contents = soup2.find("div", class_="post-content")
contents = str(contents)
report = re.findall(news, contents)[0]
report = re.sub("<br(\s+)?/>(\s+)?", "", report) # 替换<br>标签
report = re.sub("/", "", report)
report = re.sub(r'【融资收购.*?【', '【', report).replace("【泡面头条】", "【洛七早报】")
report = str(report)
news1.append(report.strip())
return news1
# -----------------------------------------------text内容-------------------------------------------------------
# 获取内容,先分割,后发送
# 洛七早报 【洛.*(?=【)
# 国内头条 【国.*(?=【)
# 海外头条 【海.*(?=【)
# 体育竞技 【体.*(?=【)
# 财经新闻 【财.*(?=【)
def luoqi(content):
luoqi = re.compile(r'【洛.*?(?=【)')
luoqi1 = re.findall(luoqi, str(content))
luoqi2 = [str(i) for i in luoqi1]
luoqi3 = ''.join(luoqi2).replace("。", "").replace("1、", "\n1、").replace("2、", "\n2、").replace("3、", "\n3、").replace(
"4、", "\n4、"). \
replace("5、", "\n5、").replace("6、", "\n6、").replace("7、", "\n7、").replace("8、", "\n8、").replace("9、", "\n9、"). \
replace("10、", "\n10、")
# print(luoqi3)
return luoqi3
def guonei(content):
guonei = re.compile(r'【国.*?(?=【)')
guonei1 = re.findall(guonei, str(content))
guonei2 = [str(i) for i in guonei1]
guonei3 = ''.join(guonei2).replace("。", "").replace("1、", "\n1、").replace("2、", "\n2、").replace("3、",
"\n3、").replace(
"4、", "\n4、"). \
replace("5、", "\n5、").replace("6、", "\n6、").replace("7、", "\n7、").replace("8、", "\n8、").replace("9、", "\n9、"). \
replace("10、", "\n10、")
# print(guonei3)
return guonei3
def haiwai(content):
haiwai = re.compile(r'【海.*?(?=【)')
haiwai1 = re.findall(haiwai, str(content))
haiwai2 = [str(i) for i in haiwai1]
haiwai3 = ''.join(haiwai2).replace("。", "").replace("1、", "\n1、").replace("2、", "\n2、").replace("3、",
"\n3、").replace(
"4、", "\n4、"). \
replace("5、", "\n5、").replace("6、", "\n6、").replace("7、", "\n7、").replace("8、", "\n8、").replace("9、", "\n9、"). \
replace("10、", "\n10、")
# print(haiwai3)
return haiwai3
def tiyu(content):
tiyu = re.compile(r'【体.*?(?=【)')
tiyu1 = re.findall(tiyu, str(content))
tiyu2 = [str(i) for i in tiyu1]
tiyu3 = ''.join(tiyu2).replace("。", "").replace("1、", "\n1、").replace("2、", "\n2、").replace("3、", "\n3、").replace(
"4、", "\n4、"). \
replace("5、", "\n5、").replace("6、", "\n6、").replace("7、", "\n7、").replace("8、", "\n8、").replace("9、", "\n9、"). \
replace("10、", "\n10、")
# print(tiyu3)
return tiyu3
def caijing(content):
caijing = re.compile(r'【财经新闻】.*')
caijing1 = re.findall(caijing, str(content))
caijing2 = [str(i) for i in caijing1]
caijing3 = ''.join(caijing2).replace("。", "").replace("1、", "\n1、").replace("2、", "\n2、").replace("3、",
"\n3、").replace(
"4、", "\n4、"). \
replace("5、", "\n5、").replace("6、", "\n6、").replace("7、", "\n7、").replace("8、", "\n8、").replace("9、", "\n9、"). \
replace("10、", "\n10、").replace("\']", "")
# print(caijing3)
return caijing3
# ---------------------------------------------- 推送 ------------------------------------------------ #
def wx_push(newsdata): # 企业微信的推送,用到
data = {
"corpid": "", # 企业ID
"corpsecret": "", # 应用的凭证密钥secret
"agentid": "", # 应用ID
"text": newsdata # 推送内容,支持HTML
}
wxtalk = 'https://api.htm.fun/api/Wechat/text/'
response = requests.get(wxtalk, data=data)
return response
def serverPush(data): # 这是server酱的推送,没用到
data1 = {
"title": "推送",
"desp": data
}
wx_tui = "https://sctapi.ftqq.com/填写你的key.send"
response = requests.post(wx_tui, data=data1)
return response
def qmsgPush(data): # 这是qmsg酱推送,没用到
data2 = {
"msg": data
}
qmsg = "https://qmsg.zendee.cn/send/填写你的key"
response = requests.post(qmsg, data=data2)
return response
def main(arg1,arg2): # 入口函数
link = getLink()
news = getNews(link)
# 微信推送
wx_push(luoqi(news))
wx_push(guonei(news))
wx_push(haiwai(news))
wx_push(tiyu(news))
wx_push(caijing(news))