好友
阅读权限10
听众
最后登录1970-1-1
|
违规麻烦联系删除 - -
只爬出基金代码 其他的都没有爬取 EXCEL没有处理好 单列不会处理
import pandas as pd,re
import requests
import random
#http://fund.eastmoney.com/HH_jzzzl.html
def main():
aa1=[]
datalist = {}
for i in range(1,23):
baseurl = "http://fund.eastmoney.com/Data/Fund_JJJZ_Data.aspx?t=1&lx=3&letter=&gsid=&text=&sort=zdf,desc&page="+str(i)+",200&dt=1603365267841&atfc=&onlySale=0"
html = getdata(baseurl)
# print(html)
# html = duquwenjian()
aa1.extend(jiexIDAta(html))
print(f'第{i}页')
datalist['基金代码'] = aa1
save(datalist,".\\123.xls")
# html = duquwenjian()
def getdata(baseurl):
user_agent_list = ['Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER','Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)','Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36']
headers = {
"User-Agent":random.choice(user_agent_list)
}
data = {} #,params=data
try:
response = requests.get(baseurl,headers=headers ,timeout=1)
if response.status_code == 200:
with open("123.txt","w+",encoding="utf-8") as f:
f.write(str(response.text))
return response.text
except RequestsException:
return None
def jiexidata(html):
a = re.findall(".*datas:\[(.*)],count.*",html,re.S)
a = str(a)[2:]
# print(a)
a = re.sub("\[","",a)
a = re.sub('"',"",a)
a = re.split("],",a)
# print(len(a))
a1 = [] #基金代码
a2 = [] #基金名称
a3 = [] #单位净值当天
a4 = [] #累计净值当天
a5 = [] #单位净值昨天
a6 = [] #累计净值昨天
# print(a[0])
for i in range(0,len(a)):
# print(a[i])
b = re.split(",",a[i])
a1.append(b[0])
return a1
def duquwenjian():
with open("123.txt","r",encoding="utf-8") as f:
a = f.read()
f.close
return a
def save(a,savepath):
students = pd.DataFrame(a)
# print(list(a.keys())[0])
# students = students.set_index(list(a.keys())[0])
students.to_excel(savepath)
if __name__ == '__main__':
main() |
-
免费评分
-
查看全部评分
|
发帖前要善用【论坛搜索】功能,那里可能会有你要找的答案或者已经有人发布过相同内容了,请勿重复发帖。 |
|
|
|
|