import requests
import pandas as pd
from datetime import datetime, timedelta
def fetch_match_data(date, url, headers):
params = {
"matchPage": "1",
"matchBeginDate": date,
"matchEndDate": date,
"leagueId": "",
"pageSize": "100",
"pageNo": "1",
"isFix": "0",
"pcOrWap": "1"
}
response = requests.get(url, headers=headers, params=params)
if response.status_code == 200:
return response.json()
else:
return None
def parse_match_data(response_json):
if response_json["errorCode"] == "0" and response_json["value"]["resultCount"] > 0:
matches = response_json["value"].get("matchResult", [])
Time1 = [match["matchDate"] for match in matches if "matchDate" in match]
mathstr = [match["matchNumStr"] for match in matches if "matchNumStr" in match]
TimeId = [match["matchId"] for match in matches if "matchId" in match]
LeagueName = [match["leagueNameAbbr"] for match in matches if "leagueNameAbbr" in match]
Home = [match["homeTeam"] for match in matches if "homeTeam" in match]
Away = [match["awayTeam"] for match in matches if "awayTeam" in match]
rang = [match["goalLine"] for match in matches if "goalLine" in match]
non_empty_indices = [i for i, x in enumerate(rang) if x != '']
return {
"Time1": [Time1[i] for i in non_empty_indices],
"mathstr": [mathstr[i] for i in non_empty_indices],
"TimeId": [TimeId[i] for i in non_empty_indices],
"LeagueName": [LeagueName[i] for i in non_empty_indices],
"Home": [Home[i] for i in non_empty_indices],
"Away": [Away[i] for i in non_empty_indices],
"rang": [rang[i] for i in non_empty_indices],
}
else:
print(f"没有比赛或数据错误: {response_json.get('errorMessage', '未知错误')}")
return None
def fetch_support_rates(match_ids, url2, headers):
params2 = {
"matchIds": ",".join(map(str, match_ids)),
"poolCode": "hhad,had",
"sportType": "1"
}
response = requests.get(url2, headers=headers, params=params2)
# print(response.text)
if response.status_code == 200:
return response.json()
else:
return None
def fetch_result_rates(data, url3, headers):
params = {
"matchPage": "1",
"matchBeginDate": data,
"matchEndDate": data,
"leagueId": "",
"pageSize": "100",
"pageNo": "1",
"isFix": "0",
"pcOrWap": "1"
}
response = requests.get(url3, headers=headers, params=params)
if response.status_code == 200:
return response.json()
else:
return None
def combine_data(parsed_match_data, support_data):
extracted_data = {
"时间": parsed_match_data["Time1"],
"TimeId": parsed_match_data["mathstr"],
"联赛": parsed_match_data["LeagueName"],
"主队": parsed_match_data["Home"],
"客队": parsed_match_data["Away"],
"让球": parsed_match_data["rang"],
"胜支持率": [],
"平支持率": [],
"负支持率": [],
"让胜支持率": [],
"让平支持率": [],
"让负支持率": []
}
for match_id, match_details in support_data["value"].items():
hhad_data = match_details.get("HHAD", {})
had_data = match_details.get("HAD", {})
extracted_data["胜支持率"].append(had_data.get("hSupportRate", ""))
extracted_data["平支持率"].append(had_data.get("dSupportRate", ""))
extracted_data["负支持率"].append(had_data.get("aSupportRate", ""))
extracted_data["让胜支持率"].append(hhad_data.get("hSupportRate", ""))
extracted_data["让平支持率"].append(hhad_data.get("dSupportRate", ""))
extracted_data["让负支持率"].append(hhad_data.get("aSupportRate", ""))
# 检查列的长度是否一致
max_len = max(len(v) for v in extracted_data.values())
for key, value in extracted_data.items():
if len(value) < max_len:
value.extend([None] * (max_len - len(value)))
keys_to_reverse = ["胜支持率", "平支持率", "负支持率", "让胜支持率", "让平支持率", "让负支持率"]
# for key in keys_to_reverse:
# extracted_data[key] = extracted_data[key][::-1]
# 创建 DataFrame
return pd.DataFrame(extracted_data)
def save_to_excel(dataframe, file_path):
if not dataframe.empty:
dataframe.to_excel(file_path, index=False)
print(f"数据已保存在 {file_path}")
else:
print("没有数据可保存。")
def fetch_football_data(start_date, end_date, url, url2, headers, excel_path="最新支持率.xlsx"):
all_data = pd.DataFrame()
for single_date in (start_date + timedelta(n) for n in range((end_date - start_date).days + 1)):
date_str = single_date.strftime('%Y-%m-%d')
print(f"正在爬取 {date_str} 的数据...")
match_data = fetch_match_data(date_str, url, headers)
# print(match_data)第一次采集数据
if match_data:
parsed_match_data = parse_match_data(match_data)
if parsed_match_data:
match_ids = parsed_match_data["TimeId"]
print(match_ids)
support_data = fetch_support_rates(match_ids, url2, headers)
result_data = fetch_result_rates(date_str,url3, headers)
# print(result_data)
# print(support_data)
if support_data:
# 合并比赛数据和支持率数据
combined_df = combine_data(parsed_match_data, support_data)
# print(combined_df)就是表头
# 按照 TimeId 排序
sorted_indices = sorted(range(len(parsed_match_data["TimeId"])),
key=lambda k: int(parsed_match_data["TimeId"][k]))
# 根据排序索引重排相关数据
Time1_sorted1 = [parsed_match_data["Time1"][i] for i in sorted_indices]
mathstr2 = [parsed_match_data["mathstr"][i] for i in sorted_indices]
TimeId_sorted1 = [parsed_match_data["TimeId"][i] for i in sorted_indices]
LeagueName_sorted1 = [parsed_match_data["LeagueName"][i] for i in sorted_indices]
Home_sorted1 = [parsed_match_data["Home"][i] for i in sorted_indices]
Away_sorted1 = [parsed_match_data["Away"][i] for i in sorted_indices]
rang_sorted = [parsed_match_data["rang"][i] for i in sorted_indices]
# 更新数据框
sorted_data = pd.DataFrame({
"Time1": Time1_sorted1,
"mathstr": mathstr2,
"TimeId": TimeId_sorted1,
"LeagueName": LeagueName_sorted1,
"Home": Home_sorted1,
"Away": Away_sorted1,
"胜支持率": combined_df["胜支持率"],
"平支持率": combined_df["平支持率"],
"负支持率": combined_df["负支持率"],
"rang": rang_sorted,
"让胜支持率": combined_df["让胜支持率"],
"让平支持率": combined_df["让平支持率"],
"让负支持率": combined_df["让负支持率"]
})
if result_data:
print(result_data)
# 合并所有数据
all_data = pd.concat([all_data, sorted_data], ignore_index=True)
save_to_excel(all_data, excel_path)
# Example usage:
if __name__ == '__main__':
headers = {
}
start_date = datetime(2024, 11, 28)
end_date = datetime(2024, 11, 29)
url = aHR0cHM6Ly93ZWJhcGkuc3BvcnR0ZXJ5LmNuL2dhdGV3YXkvamMvZm9vdGJhbGwvZ2V0TWF0Y2hSZXN1bHRWMS5xcnk=
url2 = aHR0cHM6Ly93ZWJhcGkuc3BvcnR0ZXJ5LmNuL2dhdGV3YXkvamMvY29tbW9uL2dldFN1cHBvcnRSYXRlVjEucXJ5
url3 = aHR0cHM6Ly93ZWJhcGkuc3BvcnR0ZXJ5LmNuL2dhdGV3YXkvamMvZm9vdGJhbGwvZ2V0TWF0Y2hSZXN1bHRWMS5xcnk=
fetch_football_data(start_date, end_date, url, url2, headers)