【Python】模拟预测2022卡塔尔世界杯结果

cococola · 发表于 2022-11-25 22:00

一、前言

在逛github的时候看到一个大佬发布了模拟2022世界杯的程序，在我试验了以后发现最终模拟的结果是巴西会夺冠。
这应该符合很多人的想法猜测吧，哈哈哈哈。
声明：本贴仅供学习交流使用，如存在侵权行为请告知我立即删帖。
github原作者【QuarterTime】 --反正我是在他那里找到的，至于他是不是原作者我也就不清楚了。

如有违规，请吧主删帖，不要留情。

二、代码测试截图

三、程序代码
本程序使用的配套数据我已经打包好放在了最后面，代码由两部分构成，先上第一部分保存数据的代码

[Python] 纯文本查看 复制代码

# -*- coding:utf-8 -*-
# Wan Jiongming @Copyright 2022

import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont

with open("country_name.txt", "r", encoding="utf-8") as f:
    info = f.readlines()
    info = list(map(lambda x:x.strip(), info))

English_name = info[:32]
Chinese_name = info[32:]

country_name = {}

for each in zip(English_name, Chinese_name):
    country_name[each[0]] = each[1]

def cv2AddChineseText(img, text, position, textColor=(0, 255, 0), textSize=30):
    if (isinstance(img, np.ndarray)):  # 判断是否OpenCV图片类型
        img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    # 创建一个可以在给定图像上绘图的对象
    draw = ImageDraw.Draw(img)
    # 字体的格式
    fontStyle = ImageFont.truetype(
        "simsun.ttc", textSize, encoding="utf-8")
    # 绘制文本
    draw.text(position, text, textColor, font=fontStyle)
    # 转换回OpenCV格式
    return cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR)

def save_res(a, b, winner, prob, save_name):
    img = np.zeros((520, 850, 3), np.uint8)

    a_img_path = "./world_cup/" + country_name[a] + ".png"
    a_img = cv2.imdecode(np.fromfile(a_img_path, dtype = np.uint8), -1)

    a_img_h, a_img_w, _ = a_img.shape

    b_img_path = "./world_cup/" + country_name[b] + ".png"
    b_img = cv2.imdecode(np.fromfile(b_img_path, dtype = np.uint8), -1)

    b_img_h, b_img_w, _ = b_img.shape

    winner_img_path = "./world_cup/" + country_name[winner] + ".png"
    winner_img = cv2.imdecode(np.fromfile(winner_img_path, dtype = np.uint8), -1)

    winner_img_h, winner_img_w, _ = winner_img.shape

    img[10: 10 + a_img_h, 10: 10 + a_img_w] = a_img[:, :, :3]

    img[110 + a_img_h: 110 + a_img_h + b_img_h, 10: 10 + b_img_w] = b_img[:,:,:3]


    img = cv2AddChineseText(img, country_name[a] + "(" + a + ")", (10, 20 + a_img_h),(255, 255, 255), 30)

    img = cv2AddChineseText(img, country_name[b] + "(" + b + ")", (10, 20 + 100 + a_img_h + b_img_h), (255, 255, 255), 30)

    point1 = (10 + a_img_w, 10 + (a_img_h) // 2)
    point2 = (10 + a_img_w + 100, 10 + (a_img_h) // 2)

    cv2.line(img, point1, point2, (255, 255, 255), 10)

    point3 = (10 + a_img_w, 10 + 100 + a_img_h + (b_img_h) // 2)
    point4 = (10 + b_img_w + 100, 10 + 100 + a_img_h + (b_img_h) // 2)

    cv2.line(img, point3, point4, (255, 255, 255), 10)

    cv2.line(img, point2, point4, (255, 255, 255), 10)


    point5 = (10 + a_img_w + 100, 10 + 50 + a_img_h)
    point6 = (10 + a_img_w + 300, 10 + 50 + a_img_h)
    cv2.line(img, point5, point6, (255, 255, 255), 10)

    img = cv2AddChineseText(img, "胜率：{}".format(prob), (10 + a_img_w + 100 + 20, 10 + 50 + a_img_h - 40),(255, 255, 255), 30)

    img[10 + 50 + a_img_h - winner_img_h // 2: 10 + 50 + a_img_h + winner_img_h // 2, 10 + a_img_w + 300 : 10 + a_img_w + 300 + winner_img_w] = winner_img[:,:,:3]

    img = cv2AddChineseText(img, country_name[winner] + "(" + winner + ")", (10 + a_img_w + 300, 10 + 50 + a_img_h + winner_img_h // 2 + 10), (255, 255, 255), 30)

    cv2.imwrite(save_name, img)

def save_res_draw(a, b, winner, prob, save_name):
    img = np.zeros((520, 850, 3), np.uint8)

    a_img_path = "./world_cup/" + country_name[a] + ".png"
    a_img = cv2.imdecode(np.fromfile(a_img_path, dtype = np.uint8), -1)

    a_img_h, a_img_w, _ = a_img.shape

    b_img_path = "./world_cup/" + country_name[b] + ".png"
    b_img = cv2.imdecode(np.fromfile(b_img_path, dtype = np.uint8), -1)

    b_img_h, b_img_w, _ = b_img.shape

    winner_img_path = "./world_cup/" + country_name[winner] + ".png"
    winner_img = cv2.imdecode(np.fromfile(winner_img_path, dtype = np.uint8), -1)

    winner_img_h, winner_img_w, _ = winner_img.shape

    img[10: 10 + a_img_h, 10: 10 + a_img_w] = a_img[:, :, :3]

    img[110 + a_img_h: 110 + a_img_h + b_img_h, 10: 10 + b_img_w] = b_img[:,:,:3]


    img = cv2AddChineseText(img, country_name[a] + "(" + a + ")", (10, 20 + a_img_h),(255, 255, 255), 30)

    img = cv2AddChineseText(img, country_name[b] + "(" + b + ")", (10, 20 + 100 + a_img_h + b_img_h), (255, 255, 255), 30)

    point1 = (10 + a_img_w, 10 + (a_img_h) // 2)
    point2 = (10 + a_img_w + 100, 10 + (a_img_h) // 2)

    cv2.line(img, point1, point2, (255, 255, 255), 10)

    point3 = (10 + a_img_w, 10 + 100 + a_img_h + (b_img_h) // 2)
    point4 = (10 + b_img_w + 100, 10 + 100 + a_img_h + (b_img_h) // 2)

    cv2.line(img, point3, point4, (255, 255, 255), 10)

    cv2.line(img, point2, point4, (255, 255, 255), 10)


    point5 = (10 + a_img_w + 100, 10 + 50 + a_img_h)
    point6 = (10 + a_img_w + 300, 10 + 50 + a_img_h)
    cv2.line(img, point5, point6, (255, 255, 255), 10)

    img = cv2AddChineseText(img, "胜率：{}".format(prob), (10 + a_img_w + 100 + 20, 10 + 50 + a_img_h - 40),(255, 255, 255), 30)

    # img[10 + 50 + a_img_h - winner_img_h // 2: 10 + 50 + a_img_h + winner_img_h // 2, 10 + a_img_w + 300 : 10 + a_img_w + 300 + winner_img_w] = winner_img[:,:,:3]

    # img = cv2AddChineseText(img, country_name[winner] + "(" + winner + ")", (10 + a_img_w + 300, 10 + 50 + a_img_h + winner_img_h // 2 + 10), (255, 255, 255), 30)

    cv2.imwrite(save_name, img)

if __name__ == "__main__":
    save_res_draw("Switzerland", "Cameroon", "Switzerland", 0.62, "tmp.png")

这里是第二部分代码，程序运行的主代码

[Python] 纯文本查看 复制代码

# -*- coding:utf-8 -*-
# Wan Jiongming @Copyright 2022

import numpy as np
import pandas as pd
from operator import itemgetter
from save_res import save_res, save_res_draw
import time

df = pd.read_csv("./kaggle/results.csv")
df["date"] = pd.to_datetime(df["date"])
df.dropna(inplace=True)
df = df[(df["date"] >= "2018-8-1")].reset_index(drop=True)

rank = pd.read_csv("./kaggle/fifa_ranking-2022-10-06.csv")
rank["rank_date"] = pd.to_datetime(rank["rank_date"])
rank = rank[(rank["rank_date"] >= "2018-8-1")].reset_index(drop=True)
rank["country_full"] = rank["country_full"].str.replace("IR Iran", "Iran").str.replace("Korea Republic", "South Korea").str.replace("USA", "United States")
rank = rank.set_index(['rank_date']).groupby(['country_full'], group_keys=False).resample('D').first().fillna(method='ffill').reset_index()
df_wc_ranked = df.merge(rank[["country_full", "total_points", "previous_points", "rank", "rank_change", "rank_date"]], left_on=["date", "home_team"], right_on=["rank_date", "country_full"]).drop(["rank_date", "country_full"], axis=1)
df_wc_ranked = df_wc_ranked.merge(rank[["country_full", "total_points", "previous_points", "rank", "rank_change", "rank_date"]], left_on=["date", "away_team"], right_on=["rank_date", "country_full"], suffixes=("_home", "_away")).drop(["rank_date", "country_full"], axis=1)
df = df_wc_ranked
# print(df[(df_wc_ranked.home_team == "Brazil") | (df.away_team == "Brazil")].tail(10))
def result_finder(home, away):
    if home > away:
        return pd.Series([0, 3, 0])
    if home < away:
        return pd.Series([1, 0, 3])
    else:
        return pd.Series([2, 1, 1])

results = df.apply(lambda x: result_finder(x["home_score"], x["away_score"]), axis=1)
df[["result", "home_team_points", "away_team_points"]] = results
# print(df[(df_wc_ranked.home_team == "Brazil") | (df.away_team == "Brazil")].tail(10))

import seaborn as sns
import matplotlib.pyplot as plt

# plt.figure(figsize=(15, 10))
# sns.heatmap(df[["total_points_home", "rank_home", "total_points_away", "rank_away"]].corr())
# plt.show()

df["rank_dif"] = df["rank_home"] - df["rank_away"]
df["sg"] = df["home_score"] - df["away_score"]
df["points_home_by_rank"] = df["home_team_points"]/df["rank_away"]
df["points_away_by_rank"] = df["away_team_points"]/df["rank_home"]

home_team = df[["date", "home_team", "home_score", "away_score", "rank_home", "rank_away","rank_change_home", "total_points_home", "result", "rank_dif", "points_home_by_rank", "home_team_points"]]

away_team = df[["date", "away_team", "away_score", "home_score", "rank_away", "rank_home","rank_change_away", "total_points_away", "result", "rank_dif", "points_away_by_rank", "away_team_points"]]

home_team.columns = [h.replace("home_", "").replace("_home", "").replace("away_", "suf_").replace("_away", "_suf") for h in home_team.columns]

away_team.columns = [a.replace("away_", "").replace("_away", "").replace("home_", "suf_").replace("_home", "_suf") for a in away_team.columns]

team_stats = home_team.append(away_team)#.sort_values("date")
team_stats_raw = team_stats.copy()

stats_val = []

for index, row in team_stats.iterrows():
    team = row["team"]
    date = row["date"]
    past_games = team_stats.loc[(team_stats["team"] == team) & (team_stats["date"] < date)].sort_values(by=['date'], ascending=False)
    last5 = past_games.head(5)
    
    goals = past_games["score"].mean()
    goals_l5 = last5["score"].mean()
    
    goals_suf = past_games["suf_score"].mean()
    goals_suf_l5 = last5["suf_score"].mean()
    
    rank = past_games["rank_suf"].mean()
    rank_l5 = last5["rank_suf"].mean()
    
    if len(last5) > 0:
        points = past_games["total_points"].values[0] - past_games["total_points"].values[-1]#qtd de pontos ganhos
        points_l5 = last5["total_points"].values[0] - last5["total_points"].values[-1] 
    else:
        points = 0
        points_l5 = 0
        
    gp = past_games["team_points"].mean()
    gp_l5 = last5["team_points"].mean()
    
    gp_rank = past_games["points_by_rank"].mean()
    gp_rank_l5 = last5["points_by_rank"].mean()
    
    stats_val.append([goals, goals_l5, goals_suf, goals_suf_l5, rank, rank_l5, points, points_l5, gp, gp_l5, gp_rank, gp_rank_l5])

stats_cols = ["goals_mean", "goals_mean_l5", "goals_suf_mean", "goals_suf_mean_l5", "rank_mean", "rank_mean_l5", "points_mean", "points_mean_l5", "game_points_mean", "game_points_mean_l5", "game_points_rank_mean", "game_points_rank_mean_l5"]

stats_df = pd.DataFrame(stats_val, columns=stats_cols)

full_df = pd.concat([team_stats.reset_index(drop=True), stats_df], axis=1, ignore_index=False)

home_team_stats = full_df.iloc[:int(full_df.shape[0]/2),:]
away_team_stats = full_df.iloc[int(full_df.shape[0]/2):,:]


home_team_stats = home_team_stats[home_team_stats.columns[-12:]]
away_team_stats = away_team_stats[away_team_stats.columns[-12:]]

home_team_stats.columns = ['home_'+str(col) for col in home_team_stats.columns]
away_team_stats.columns = ['away_'+str(col) for col in away_team_stats.columns]

match_stats = pd.concat([home_team_stats, away_team_stats.reset_index(drop=True)], axis=1, ignore_index=False)

full_df = pd.concat([df, match_stats.reset_index(drop=True)], axis=1, ignore_index=False)

def find_friendly(x):
    if x == "Friendly":
        return 1
    else: return 0

full_df["is_friendly"] = full_df["tournament"].apply(lambda x: find_friendly(x)) 

full_df = pd.get_dummies(full_df, columns=["is_friendly"])

base_df = full_df[["date", "home_team", "away_team", "rank_home", "rank_away","home_score", "away_score","result", "rank_dif", "rank_change_home", "rank_change_away", 'home_goals_mean',
       'home_goals_mean_l5', 'home_goals_suf_mean', 'home_goals_suf_mean_l5',
       'home_rank_mean', 'home_rank_mean_l5', 'home_points_mean',
       'home_points_mean_l5', 'away_goals_mean', 'away_goals_mean_l5',
       'away_goals_suf_mean', 'away_goals_suf_mean_l5', 'away_rank_mean',
       'away_rank_mean_l5', 'away_points_mean', 'away_points_mean_l5','home_game_points_mean', 'home_game_points_mean_l5',
       'home_game_points_rank_mean', 'home_game_points_rank_mean_l5','away_game_points_mean',
       'away_game_points_mean_l5', 'away_game_points_rank_mean',
       'away_game_points_rank_mean_l5',
       'is_friendly_0', 'is_friendly_1']]

base_df_no_fg = base_df.dropna()

df = base_df_no_fg

def no_draw(x):
    if x == 2:
        return 1
    else:
        return x
    
df["target"] = df["result"].apply(lambda x: no_draw(x))

def create_db(df):
    columns = ["home_team", "away_team", "target", "rank_dif", "home_goals_mean", "home_rank_mean", "away_goals_mean", "away_rank_mean", "home_rank_mean_l5", "away_rank_mean_l5", "home_goals_suf_mean", "away_goals_suf_mean", "home_goals_mean_l5", "away_goals_mean_l5", "home_goals_suf_mean_l5", "away_goals_suf_mean_l5", "home_game_points_rank_mean", "home_game_points_rank_mean_l5", "away_game_points_rank_mean", "away_game_points_rank_mean_l5","is_friendly_0", "is_friendly_1"]
    
    base = df.loc[:, columns]
    base.loc[:, "goals_dif"] = base["home_goals_mean"] - base["away_goals_mean"]
    base.loc[:, "goals_dif_l5"] = base["home_goals_mean_l5"] - base["away_goals_mean_l5"]
    base.loc[:, "goals_suf_dif"] = base["home_goals_suf_mean"] - base["away_goals_suf_mean"]
    base.loc[:, "goals_suf_dif_l5"] = base["home_goals_suf_mean_l5"] - base["away_goals_suf_mean_l5"]
    base.loc[:, "goals_per_ranking_dif"] = (base["home_goals_mean"] / base["home_rank_mean"]) - (base["away_goals_mean"] / base["away_rank_mean"])
    base.loc[:, "dif_rank_agst"] = base["home_rank_mean"] - base["away_rank_mean"]
    base.loc[:, "dif_rank_agst_l5"] = base["home_rank_mean_l5"] - base["away_rank_mean_l5"]
    base.loc[:, "dif_points_rank"] = base["home_game_points_rank_mean"] - base["away_game_points_rank_mean"]
    base.loc[:, "dif_points_rank_l5"] = base["home_game_points_rank_mean_l5"] - base["away_game_points_rank_mean_l5"]
    
    model_df = base[["home_team", "away_team", "target", "rank_dif", "goals_dif", "goals_dif_l5", "goals_suf_dif", "goals_suf_dif_l5", "goals_per_ranking_dif", "dif_rank_agst", "dif_rank_agst_l5", "dif_points_rank", "dif_points_rank_l5", "is_friendly_0", "is_friendly_1"]]
    return model_df

model_db = create_db(df)

X = model_db.iloc[:, 3:]
y = model_db[["target"]]


from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split, GridSearchCV

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, random_state=1)

gb = GradientBoostingClassifier(random_state=5)

params = {"learning_rate": [0.01, 0.1, 0.5],
            "min_samples_split": [5, 10],
            "min_samples_leaf": [3, 5],
            "max_depth":[3,5,10],
            "max_features":["sqrt"],
            "n_estimators":[100, 200]
         } 

gb_cv = GridSearchCV(gb, params, cv = 3, n_jobs = -1, verbose = False)

gb_cv.fit(X_train.values, np.ravel(y_train))

gb = gb_cv.best_estimator_

params_rf = {"max_depth": [20],
                "min_samples_split": [10],
                "max_leaf_nodes": [175],
                "min_samples_leaf": [5],
                "n_estimators": [250],
                 "max_features": ["sqrt"],
                }

rf = RandomForestClassifier(random_state=1)

rf_cv = GridSearchCV(rf, params_rf, cv = 3, n_jobs = -1, verbose = False)

rf_cv.fit(X_train.values, np.ravel(y_train))

rf = rf_cv.best_estimator_

# from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score

# def analyze(model):
#     fpr, tpr, _ = roc_curve(y_test, model.predict_proba(X_test.values)[:,1]) #test AUC
#     plt.figure(figsize=(15,10))
#     plt.plot([0, 1], [0, 1], 'k--')
#     plt.plot(fpr, tpr, label="test")

#     fpr_train, tpr_train, _ = roc_curve(y_train, model.predict_proba(X_train.values)[:,1]) #train AUC
#     plt.plot(fpr_train, tpr_train, label="train")
#     auc_test = roc_auc_score(y_test, model.predict_proba(X_test.values)[:,1])
#     auc_train = roc_auc_score(y_train, model.predict_proba(X_train.values)[:,1])
#     plt.legend()
#     plt.title('AUC score is %.2f on test and %.2f on training'%(auc_test, auc_train))
#     plt.show()
    
#     plt.figure(figsize=(15, 10))
#     cm = confusion_matrix(y_test, model.predict(X_test.values))
#     sns.heatmap(cm, annot=True, fmt="d")

# analyze(gb)

with open("country_name.txt", "r", encoding="utf-8") as f:
    info = f.readlines()
    info = list(map(lambda x:x.strip(), info))

English_name = info[:32]
Chinese_name = info[32:]

country_name = {}

for each in zip(English_name, Chinese_name):
    country_name[each[0]] = each[1]


table = {'A': [['Qatar', 0, []],
  ['Ecuador', 0, []],
  ['Senegal', 0, []],
  ['Netherlands', 0, []]],
 'B': [['England', 0, []],
  ['Iran', 0, []],
  ['United States', 0, []],
  ['Wales', 0, []]],
 'C': [['Argentina', 0, []],
  ['Saudi Arabia', 0, []],
  ['Mexico', 0, []],
  ['Poland', 0, []]],
 'D': [['France', 0, []],
  ['Australia', 0, []],
  ['Denmark', 0, []],
  ['Tunisia', 0, []]],
 'E': [['Spain', 0, []],
  ['Costa Rica', 0, []],
  ['Germany', 0, []],
  ['Japan', 0, []]],
 'F': [['Belgium', 0, []],
  ['Canada', 0, []],
  ['Morocco', 0, []],
  ['Croatia', 0, []]],
 'G': [['Brazil', 0, []],
  ['Serbia', 0, []],
  ['Switzerland', 0, []],
  ['Cameroon', 0, []]],
 'H': [['Portugal', 0, []],
  ['Ghana', 0, []],
  ['Uruguay', 0, []],
  ['South Korea', 0, []]]}
groups = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
group_count = 7
matches = [('A', 'Qatar', 'Ecuador'),
 ('A', 'Senegal', 'Netherlands'),
 ('A', 'Qatar', 'Senegal'),
 ('A', 'Netherlands', 'Ecuador'),
 ('A', 'Ecuador', 'Senegal'),
 ('A', 'Netherlands', 'Qatar'),
 ('B', 'England', 'Iran'),
 ('B', 'United States', 'Wales'),
 ('B', 'Wales', 'Iran'),
 ('B', 'England', 'United States'),
 ('B', 'Wales', 'England'),
 ('B', 'Iran', 'United States'),
 ('C', 'Argentina', 'Saudi Arabia'),
 ('C', 'Mexico', 'Poland'),
 ('C', 'Poland', 'Saudi Arabia'),
 ('C', 'Argentina', 'Mexico'),
 ('C', 'Poland', 'Argentina'),
 ('C', 'Saudi Arabia', 'Mexico'),
 ('D', 'Denmark', 'Tunisia'),
 ('D', 'France', 'Australia'),
 ('D', 'Tunisia', 'Australia'),
 ('D', 'France', 'Denmark'),
 ('D', 'Australia', 'Denmark'),
 ('D', 'Tunisia', 'France'),
 ('E', 'Germany', 'Japan'),
 ('E', 'Spain', 'Costa Rica'),
 ('E', 'Japan', 'Costa Rica'),
 ('E', 'Spain', 'Germany'),
 ('E', 'Japan', 'Spain'),
 ('E', 'Costa Rica', 'Germany'),
 ('F', 'Morocco', 'Croatia'),
 ('F', 'Belgium', 'Canada'),
 ('F', 'Belgium', 'Morocco'),
 ('F', 'Croatia', 'Canada'),
 ('F', 'Croatia', 'Belgium'),
 ('F', 'Canada', 'Morocco'),
 ('G', 'Switzerland', 'Cameroon'),
 ('G', 'Brazil', 'Serbia'),
 ('G', 'Cameroon', 'Serbia'),
 ('G', 'Brazil', 'Switzerland'),
 ('G', 'Serbia', 'Switzerland'),
 ('G', 'Cameroon', 'Brazil'),
 ('H', 'Uruguay', 'South Korea'),
 ('H', 'Portugal', 'Ghana'),
 ('H', 'South Korea', 'Ghana'),
 ('H', 'Portugal', 'Uruguay'),
 ('H', 'Ghana', 'Uruguay'),
 ('H', 'South Korea', 'Portugal')]

def find_stats(team_1):
#team_1 = "Qatar"
    past_games = team_stats_raw[(team_stats_raw["team"] == team_1)].sort_values("date")
    last5 = team_stats_raw[(team_stats_raw["team"] == team_1)].sort_values("date").tail(5)

    team_1_rank = past_games["rank"].values[-1]
    team_1_goals = past_games.score.mean()
    team_1_goals_l5 = last5.score.mean()
    team_1_goals_suf = past_games.suf_score.mean()
    team_1_goals_suf_l5 = last5.suf_score.mean()
    team_1_rank_suf = past_games.rank_suf.mean()
    team_1_rank_suf_l5 = last5.rank_suf.mean()
    team_1_gp_rank = past_games.points_by_rank.mean()
    team_1_gp_rank_l5 = last5.points_by_rank.mean()

    return [team_1_rank, team_1_goals, team_1_goals_l5, team_1_goals_suf, team_1_goals_suf_l5, team_1_rank_suf, team_1_rank_suf_l5, team_1_gp_rank, team_1_gp_rank_l5]

def find_features(team_1, team_2):
    rank_dif = team_1[0] - team_2[0]
    goals_dif = team_1[1] - team_2[1]
    goals_dif_l5 = team_1[2] - team_2[2]
    goals_suf_dif = team_1[3] - team_2[3]
    goals_suf_dif_l5 = team_1[4] - team_2[4]
    goals_per_ranking_dif = (team_1[1]/team_1[5]) - (team_2[1]/team_2[5])
    dif_rank_agst = team_1[5] - team_2[5]
    dif_rank_agst_l5 = team_1[6] - team_2[6]
    dif_gp_rank = team_1[7] - team_2[7]
    dif_gp_rank_l5 = team_1[8] - team_2[8]
    
    return [rank_dif, goals_dif, goals_dif_l5, goals_suf_dif, goals_suf_dif_l5, goals_per_ranking_dif, dif_rank_agst, dif_rank_agst_l5, dif_gp_rank, dif_gp_rank_l5, 1, 0]

advanced_group = []
last_group = ""

for k in table.keys():
    for t in table[k]:
        t[1] = 0
        t[2] = []
        
for idx, teams in enumerate(matches):
    draw = False
    team_1 = find_stats(teams[1])
    team_2 = find_stats(teams[2])

    features_g1 = find_features(team_1, team_2)
    features_g2 = find_features(team_2, team_1)

    probs_g1 = gb.predict_proba([features_g1])
    probs_g2 = gb.predict_proba([features_g2])
    
    team_1_prob_g1 = probs_g1[0][0]
    team_1_prob_g2 = probs_g2[0][1]
    team_2_prob_g1 = probs_g1[0][1]
    team_2_prob_g2 = probs_g2[0][0]

    team_1_prob = (probs_g1[0][0] + probs_g2[0][1])/2
    team_2_prob = (probs_g2[0][0] + probs_g1[0][1])/2
    
    if ((team_1_prob_g1 > team_2_prob_g1) & (team_2_prob_g2 > team_1_prob_g2)) | ((team_1_prob_g1 < team_2_prob_g1) & (team_2_prob_g2 < team_1_prob_g2)):
        draw=True
        for i in table[teams[0]]:
            if i[0] == teams[1] or i[0] == teams[2]:
                i[1] += 1
                
    elif team_1_prob > team_2_prob:
        winner = teams[1]
        winner_proba = team_1_prob
        for i in table[teams[0]]:
            if i[0] == teams[1]:
                i[1] += 3
                
    elif team_2_prob > team_1_prob:  
        winner = teams[2]
        winner_proba = team_2_prob
        for i in table[teams[0]]:
            if i[0] == teams[2]:
                i[1] += 3
    
    for i in table[teams[0]]: #adding criterio de desempate (probs por jogo)
            if i[0] == teams[1]:
                i[2].append(team_1_prob)
            if i[0] == teams[2]:
                i[2].append(team_2_prob)

    if last_group != teams[0]:
        if last_group != "":
            print("\n")
            print("小组 %s 排名: "%(last_group))
            
            for i in table[last_group]: #adding crieterio de desempate
                i[2] = np.mean(i[2])
            
            final_points = table[last_group]
            final_table = sorted(final_points, key=itemgetter(1, 2), reverse = True)
            advanced_group.append([final_table[0][0], final_table[1][0]])
            for i in final_table:
                print("%s -------- %d"%(country_name[i[0]] + "(" + i[0] + ")", i[1]))
        print("\n")
        print("-"*10+" 开始分析小组 %s "%(teams[0])+"-"*10)
        
        
    if draw == False:
        print("小组 %s - %s vs. %s: %s 获胜，胜率： %.2f"%(teams[0], country_name[teams[1]] + "(" + teams[1] + ")", country_name[teams[2]] + "(" + teams[2] + ")", country_name[winner] + "(" + winner + ")", winner_proba))
        save_res(teams[1], teams[2], winner, winner_proba, "{}-{}.png".format(teams[0], idx))
    else:
        print("小组 %s - %s vs. %s: Draw"%(teams[0], teams[1], teams[2]))
        save_res_draw(teams[1], teams[2], teams[2], 0.5, "{}-{}.png".format(teams[0], idx))
    last_group =  teams[0]

print("\n")
print("小组 %s 排名: "%(last_group))

for i in table[last_group]: #adding crieterio de desempate
    i[2] = np.mean(i[2])
            
final_points = table[last_group]
final_table = sorted(final_points, key=itemgetter(1, 2), reverse = True)
advanced_group.append([final_table[0][0], final_table[1][0]])
for i in final_table:
    print("%s -------- %d"%(country_name[i[0]] + "(" + i[0] + ")", i[1]))


advanced = advanced_group

playoffs = {"16 强": [], "四分之一决赛": [], "半决赛": [], "决赛": []}

for p in playoffs.keys():
    playoffs[p] = []

actual_round = ""
next_rounds = []

for p in playoffs.keys():
    if p == "16 强":
        control = []
        for a in range(0, len(advanced*2), 1):
            if a < len(advanced):
                if a % 2 == 0:
                    control.append((advanced*2)[a][0])
                else:
                    control.append((advanced*2)[a][1])
            else:
                if a % 2 == 0:
                    control.append((advanced*2)[a][1])
                else:
                    control.append((advanced*2)[a][0])

        playoffs[p] = [[control[c], control[c+1]] for c in range(0, len(control)-1, 1) if c%2 == 0]
        
        for i in range(0, len(playoffs[p]), 1):
            game = playoffs[p][i]
            
            home = game[0]
            away = game[1]
            team_1 = find_stats(home)
            team_2 = find_stats(away)

            features_g1 = find_features(team_1, team_2)
            features_g2 = find_features(team_2, team_1)
            
            probs_g1 = gb.predict_proba([features_g1])
            probs_g2 = gb.predict_proba([features_g2])
            
            team_1_prob = (probs_g1[0][0] + probs_g2[0][1])/2
            team_2_prob = (probs_g2[0][0] + probs_g1[0][1])/2
            
            if actual_round != p:
                print("-"*10)
                print("开始模拟 %s"%(p))
                print("-"*10)
                print("\n")
            
            if team_1_prob < team_2_prob:
                print("%s vs. %s: %s 晋级，概率： %.2f"%(country_name[home] + "(" + home + ")", country_name[away] + "(" + away + ")", country_name[away] + "(" + away + ")", team_2_prob))

                save_res(home, away, away, team_2_prob, "%s.png" % str(time.time()).replace(".", "_"))
                
                next_rounds.append(away)
            else:
                print("%s vs. %s: %s 晋级，概率： %.2f"%(country_name[home] + "(" + home + ")", country_name[away] + "(" + away + ")", country_name[home] + "(" + home + ")", team_1_prob))
                save_res(home, away, home, team_1_prob, "%s.png" % str(time.time()).replace(".", "_"))
                next_rounds.append(home)
            
            game.append([team_1_prob, team_2_prob])
            playoffs[p][i] = game
            actual_round = p
        
    else:
        playoffs[p] = [[next_rounds[c], next_rounds[c+1]] for c in range(0, len(next_rounds)-1, 1) if c%2 == 0]
        next_rounds = []
        for i in range(0, len(playoffs[p])):
            game = playoffs[p][i]
            home = game[0]
            away = game[1]
            team_1 = find_stats(home)
            team_2 = find_stats(away)
            
            features_g1 = find_features(team_1, team_2)
            features_g2 = find_features(team_2, team_1)
            
            probs_g1 = gb.predict_proba([features_g1])
            probs_g2 = gb.predict_proba([features_g2])
            
            team_1_prob = (probs_g1[0][0] + probs_g2[0][1])/2
            team_2_prob = (probs_g2[0][0] + probs_g1[0][1])/2
            
            if actual_round != p:
                print("-"*10)
                print("开始模拟 %s"%(p))
                print("-"*10)
                print("\n")
            
            if team_1_prob < team_2_prob:
                print("%s vs. %s: %s 晋级，概率： %.2f"%(country_name[home] + "(" + home + ")", country_name[away] + "(" + away + ")", country_name[away] + "(" + away + ")", team_2_prob))

                save_res(home, away, away, team_2_prob, "%s.png" % str(time.time()).replace(".", "_"))

                next_rounds.append(away)
            else:
                print("%s vs. %s: %s 晋级，概率： %.2f"%(country_name[home] + "(" + home + ")", country_name[away] + "(" + away + ")", country_name[home] + "(" + home + ")", team_1_prob))
                save_res(home, away, home, team_1_prob, "%s.png" % str(time.time()).replace(".", "_"))
                next_rounds.append(home)
            game.append([team_1_prob, team_2_prob])
            playoffs[p][i] = game
            actual_round = p

四、成品链接

Predicting-FIFA-2022-World-Cup-main.zip (1.95 MB, 下载次数: 358)

蓝奏云下载链接：
https://wwm.lanzout.com/iyqmF0gw8uif
密码:52pj

uouobb · 发表于 2022-11-26 00:23

C:\Python\Python310\python.exe C:/Users/Administrator/Desktop/Predicting-FIFA-2022-World-Cup-main/predict.py
C:\Users\Administrator\Desktop\Predicting-FIFA-2022-World-Cup-main\predict.py:56: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
team_stats = home_team.append(away_team)#.sort_values("date")
C:\Users\Administrator\Desktop\Predicting-FIFA-2022-World-Cup-main\predict.py:141: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df["target"] = df["result"].apply(lambda x: no_draw(x))

一堆好家伙。。。感觉你也是不知哪搞来的

uouobb · 发表于 2022-11-26 00:12

Traceback (most recent call last):
File "C:\Users\Administrator\Desktop\Predicting-FIFA-2022-World-Cup-main\predict.py", line 7, in <module>
from save_res import save_res, save_res_draw
File "C:\Users\Administrator\Desktop\Predicting-FIFA-2022-World-Cup-main\save_res.py", line 4, in <module>
import cv2
ModuleNotFoundError: No module named 'cv2'

没有cv2

smileat2000 · 发表于 2022-11-25 22:04

我只想知道结果

Clown4730 · 发表于 2022-11-25 22:05

拿走了。

deguoqieguo666 · 发表于 2022-11-25 22:05

拿走了，谢谢分享。

Aerberter · 发表于 2022-11-25 22:09

可靠不可靠，看看

charleschai · 发表于 2022-11-25 22:11

巴西！巴西！哈哈！

Chaos666 · 发表于 2022-11-25 22:17

哈哈会玩

sam喵喵 · 发表于 2022-11-25 22:18

C罗点球进去以后，葡萄牙整体移速降低30%，球员全属性降低了30%，越看越假了

sam喵喵 · 发表于 2022-11-25 22:19

莱奥进球之后也开始梦游

gcode · 发表于 2022-11-25 22:22

都没时间好好看看，真是遗憾了

帐号		自动登录	找回密码
密码			注册[Register]

[Python 转载] 【Python】模拟预测2022卡塔尔世界杯结果

免费评分