根据这位朋友的帖子学到的https://www.52pojie.cn/forum.php?mod=viewthread&tid=1582275&extra=page%3D1%26filter%3Dauthor%26orderby%3Ddateline
直接上代码把
[Python] 纯文本查看 复制代码 from xml import etree
import time,json
import requests
from lxml import etree
def dingtalk_warning(message):
webhook = "https://oapi.dingtalk.com/robot/send?access_token=你自己的钉钉" # 或者微信公众号 等等自己百度即可
headers = {'Content-Type': 'application/json'}
data = {
"msgtype": "text",
"text": {
"content": message,
},
}
x = requests.post(url=webhook, data=json.dumps(data), headers=headers)
print(x.content.decode())
if x.json()["errcode"] == 0:
return True
else:
return False
def heat():
url = "百度热搜"
header = {
"Referer": "https://www.baidu.com/",
"sec-ch-ua-platform" : "Windows",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36",
}
req = requests.get(url,headers=header).text
# req.encoding = "utf-8"
xl = etree.HTML(req)
content = '//*[@id="hotsearch-content-wrapper"]/li '
req_ifon = xl.xpath(content)
# 这是一个大坑 爬取的数据他是有问题 不是按照 123456 他是 14 这样的 自己定音一个方法
asd = [1,4,2,5,3,6]
sum = []
for i,j in zip(req_ifon,asd):
time.sleep(1)
webzgu = i.xpath('./a/span[2]/text()')[0]
re = i.xpath('./span/text()')
# 这个地方是解决那个热字的代码
# if not re:
# print(j , webzgu)
# else:
# print(j , webzgu , re[0])
print(webzgu)
sum.append(" "+str(j) + " " + webzgu + "\n")
# return sum
# print(sum
这是给他排序的
sum.sort()
dingtalk_warning("报警平台\n%s" % (("").join(sum)))
if __name__ == '__main__':
heat()
顺便请教大家几个问题
上面代码有个问题自己解决不了
就是有的有热搜 带了那个热字
不知道怎么把他放进去 有的带 有的不带
以后都是根据上面哪位童鞋的帖子学到的 |