[Python] 纯文本查看 复制代码
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
from datetime import datetime
# 加载数据
data = pd.read_csv('300565.csv', low_memory=False)
# 数据预处理
data = data.replace([np.inf, -np.inf], np.nan)
data = data.dropna()
# 特征和目标变量
X = data.drop(['close'], axis=1)
y = (data['close'] - data['open']) > 0
# 划分数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 训练模型
logreg = LogisticRegression()
rf = RandomForestClassifier()
gb = GradientBoostingClassifier()
models = [logreg, rf, gb]
for model in models:
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# 计算评估指标
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
print(f'Model: {type(model).__name__}')
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')
print()
# 生成买卖点数据
buy_sell = [] # 用于存储买卖点,1表示买入,0表示卖出
asset = 10000 # 初始资金为1万元
initial_asset = asset # 保存初始资金用于计算收益率
is_holding = False # 当前是否有持仓,True表示有持仓,False表示无持仓
for i in range(len(y_pred)):
if y_pred[i]: # 预测上涨,买入
if is_holding: # 如果有持仓,则不操作,保持持仓状态
buy_sell.append((data.index[len(X_train) + i], 0)) # 不操作,保持持仓状态
else: # 如果没有持仓,则买入
buy_sell.append((data.index[len(X_train) + i], 1)) # 买入
asset -= 100 * data['close'][len(X_train) + i] # 以当天收盘价买入100股,资金减少
is_holding = True # 买入后有持仓
else: # 预测下跌,卖出
if is_holding: # 如果有持仓,则卖出
buy_sell.append((data.index[len(X_train) + i], 0)) # 卖出
asset += 100 * data['close'][len(X_train) + i] # 以当天收盘价卖出100股,资金增加
is_holding = False # 卖出后无持仓
else: # 如果没有持仓,则不操作,保持无持仓状态
buy_sell.append((data.index[len(X_train) + i], 0)) # 不操作,保持无持仓状态
# 生成资金收益报告
report = pd.DataFrame({'time': data.index[len(X_train):],
'asset': [asset / initial_asset for _ in range(len(buy_sell))],
'action': buy_sell, # 这里直接使用buy_sell列表
'price': data['close'][len(X_train):].tolist(),
'quantity': [100 for _ in range(len(buy_sell))]}) # 生成包含时间、总资产、操作、价格和数量的DataFrame
report.to_csv('report.csv', index=False) # 将报告保存为CSV文件,文件名为report.csv,不包含索引列
# 绘制股价和买卖点曲线
plt.figure(figsize=(10, 6))
plt.plot(data.index, data['close'], label='close') # 绘制收盘价曲线作为参考
for i in range(len(buy_sell)): # 在图上添加注释标记买卖点
if buy_sell[i][1] == 1: # 买入点,用红色↑标记
plt.annotate('↑', (buy_sell[i][0], data['close'][i + len(X_train)]), textcoords="offset points", xytext=(0, 10),
ha='center', color='red')
else: # 卖出点,用绿色↓标记
plt.annotate('↓', (buy_sell[i][0], data['close'][i + len(X_train)]), textcoords="offset points",
xytext=(0, -10), ha='center', color='green')
plt.legend()
plt.show()