本帖最后由 zac7 于 2019-11-13 17:25 编辑
之前有转载一个文章,觉得写得挺好的,属于鸡汤文吧,然后就是格式让人很讨厌,我寻思就搞下来然后整理一下。
[Python] 纯文本查看 复制代码
#-*- coding:utf-8 -*-
# author:**Zac7**
# datetime:2019/11/13 12:29
# software: PyCharm
from lxml import etree
import requests
import csv
class Jt(object):
def __init__(self):
self.url = 'https://www.wyly.work/articles/26/'
self.headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
}
def get_html(self):
resp = requests.get(url=self.url,headers=self.headers).text
html = etree.HTML(resp)
pattern = '//div[@class="post-description"]//div//p/text()'
content = html.xpath(pattern)
# print(content)
content_str = ''.join(content)
new=content_str.replace('原文:', ' ')
new1=new.replace('笔记:2016-12-07', ' ')
new2=new1.replace('笔记:2016-12-08', ' ')
new3 = new2.replace('笔记:2016-12-09', ' ')
print(new3)
self.save_f(new3)
def save_f(self, new3):
with open('saf.csv', 'w',newline='') as f:
f.write(new3)
f.close()
def run(self):
self.get_html()
if __name__ == '__main__':
j=Jt()
j.run()
这是网站上的WEB界面
|