[Python] 纯文本查看 复制代码
import osimport datetime
from dateutil import relativedelta
import sqlite3
import numpy as np
import pandas as pd
dir = os.path.dirname(os.path.abspath(__file__))
p_dir = os.path.dirname(dir)
txt = ""
#整理微博id.csv文件
def csv_to_weibocsv():
weibo = pd.read_csv(dir+"\\微博id.csv")
columns1 = ["id","bid","正文","头条文章url","原始图片url","视频url","位置","日期","工具","点赞数","评论数","转发数","话题","@用户","日期2","是否原创","源用户id","源用户昵称","源微博id","源微博bid","源微博正文","源微博头条文章url","源微博原始图片url","源微博视频url","源微博位置","源微博日期","源微博工具","源微博点赞数","源微博评论数","源微博转发数","源微博话题","源微博@用户","日期3"]
weibo.columns = columns1
weibo["正文"]=weibo["正文"].str.replace("<br /><br />","<br />")
weibo["源微博正文"]=weibo["源微博正文"].str.replace("<br /><br />","<br />")
print("修正weibo.csv ... ")
weibo.to_csv(dir+"\\_csv\\weibo.csv",encoding='utf-8-sig')
print("成功!\n")
#评论导出csv,推荐用软件导出,更有效率
def comments_to_csv():
conn = sqlite3.connect(os.path.dirname(dir)+"\\weibodata.db")
cs = conn.cursor()
cs.execute('select * from comments')
arr=cs.fetchall()
dt = np.array(arr)
conn.close()
#print(dt)
print("导出评论 comments.cvs ... ")
pd.DataFrame(dt).to_csv(dir+"\\_csv\\comments.csv",encoding='utf-8-sig')
print("成功!\n")
#数据库导出weibo.csv,用不到了
def weibo_to_csv():
conn = sqlite3.connect(os.path.dirname(dir)+"\\weibodata.db")
cs = conn.cursor()
cs.execute('select * from weibo')
arr=cs.fetchall()
dt = np.array(arr)
conn.close()
#print(dt)
pd.DataFrame(dt).to_csv(dir+"\\_csv\\weibo.csv",encoding='utf-8-sig')
#输出评论,根据点赞数排序,前20个
def comments_print(_id_):
global comments_txt
comments_txt = ""
comments = pd.read_csv(dir+"\\_csv\\comments.csv")
columns2 = ["id","bid","weibo_id","root_id","user_id","日期","昵称","评论url","评论","图片url","点赞数"]
comments.columns = columns2
comments = comments.drop_duplicates()
data = comments.query('weibo_id ==@_id_')
data = data.sort_values("点赞数",ascending=False)
data = data[:20]
if data["id"].notnull().sum()<20:
print(data["id"].notnull().sum(),"个评论导出,数据不完整!")
for data in data.itertuples():
#print(str(data[1]))
if str(data[6]) is not None:
comments_txt = comments_txt +"> 💬 <font size=\"2\">"+ str(data[7])+",发布于:"+str(data[6])+"</font>\n<font size=\"2\"> "+str(data[9])+"</font>\n"
#out_file.write(comments_txt)
#out_file.close()
#输出weibo
def weibo_print(_id_):
weibo_name = "微博名"
global txt
global comments_txt
global pic_path_txt
global forwad_pic
weibo = pd.read_csv(dir+"\\_csv\\weibo.csv")
weibo = weibo.drop_duplicates()
weibo = weibo.query('id == @_id_')
weibo = weibo.sort_values("日期",ascending=False)
for weibo in weibo.itertuples(): txt = txt + "### ["+weibo_name+"]([url=https://m.weibo.cn/detail/]https://m.weibo.cn/detail/[/url]"+str(weibo[2])+")\n<font size=\"2\">"+str(weibo[16])+",来自:"+weibo[10]+"</font>\n"
txt = txt +'> '+str(weibo[4])+pic_path_txt
forwad = ("\n[//@"+str(weibo[19])+":]([url=https://m.weibo.cn/detail/]https://m.weibo.cn/detail/[/url]"+"{:.0f}".format(weibo[20])+")\n>> "+str(weibo[22])+"\n") if str(weibo[17]) == "False" else ""
txt = txt + forwad + forwad_pic + "\n<font size=\"2\">转发:"+str(weibo[13])+",评论:"+str(weibo[12])+",点赞:"+str(weibo[11])+"</font>\n"
txt = txt + ">[!NOTE]- 评论\n"+comments_txt+"\n---- \n"
#out_file.write(txt)
#out_file.close()
#获取weibo图片/视频路径
def get_pic(_id_):
global pic_path_txt
pic_path_txt = ""
conn = sqlite3.connect(os.path.dirname(dir)+"\\weibodata.db")
cs = conn.cursor()
cs.execute('select * from bins')
arr=cs.fetchall()
dt = np.array(arr)
conn.close()
columns3 = ["id","ext","data","weibo_id","comment","path","url"]
pic = pd.DataFrame(dt)
pic.columns = columns3
pic.drop_duplicates()
pic=pic.query('weibo_id == @_id_')
for pic in pic.itertuples():
if (pic[2] == ".jpg") | (pic[2] == ".gif"):
pic_path = str(pic[6]).replace("\\","/").split("img")
pic_path_txt=pic_path_txt+"![[img"+pic_path[1]+"]]"
else:
pic_path = str(pic[6]).replace("\\","/").split("video")
pic_path_txt=pic_path_txt+"![[video"+pic_path[1]+"]]"
pic_path_txt = pic_path_txt + "\n"
#需要导出的开始时间和截止时间
start_date = '2022-01-01'
end_date = '2023-01-01'
#comments_to_csv()
csv_to_weibocsv()
#按日输出文件
while start_date <= end_date:
start_date_strp = datetime.datetime.strptime(start_date,'%Y-%m-%d')
next_date_strp = start_date_strp +relativedelta.relativedelta(days=1)
next_date = str(next_date_strp.strftime('%Y-%m-%d'))
print(start_date)
weibo = pd.read_csv(dir+"\\_csv\\weibo.csv")
weibo.drop_duplicates() #去重
weibo = weibo.query('日期 >= @start_date & 日期 <= @next_date')
for weibo in weibo.itertuples():
print("更新... ",weibo[2])
get_pic("{:.0f}".format(weibo[20]))
forwad_pic = pic_path_txt
get_pic(str(weibo[2]))
comments_print(weibo[2])
weibo_print(weibo[2])
forwad_pic = ""
if txt != "":
out_dir = start_date.split("-")
if os.path.exists(dir+"\\"+out_dir[0]) is False:
os.mkdir(dir+"\\"+out_dir[0])
if os.path.exists(dir+"\\"+out_dir[0]+"\\"+out_dir[1]) is False:
os.mkdir(dir+"\\"+out_dir[0]+"\\"+out_dir[1])
out_file = open(dir+"\\"+out_dir[0]+"\\"+out_dir[1]+"\\"+start_date+".md",'w',encoding='utf-8')
out_file.write(txt)
out_file.close()
txt = ""
start_date = next_date