本帖最后由 很快再相见123 于 2023-10-7 00:52 编辑
[Python] 纯文本查看 复制代码 参考官网api
https://parsel.readthedocs.io/en/latest/
import requests
# import parsel
from parsel import Selector
def main_func():
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) '
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Mobile Safari/537.36'
}
url = 'https://www.gov.cn/zhengce/zuixin/home_4.htm'
response = requests.get(url, headers=headers)
response.encoding = 'utf-8'
html = response.text
# print(html)
selector = Selector(str(html).replace('<!DOCTYPE html>', ''))
print(selector)
policy = selector.css('.news_box .list a::text').getall()
date = selector.css('.news_box .list span.date::text').getall()
href = selector.css('.news_box .list a::attr(href)').getall()
print(policy, date, href)
if __name__ == '__main__':
main_func()
get请求的结果str,删除<!DOCTYPE html>即可
(官网啥都有,多看看,没坏处)
|