import requests
from lxml import etree
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0"}
URL = 'https://www.boc.cn/sourcedb/whpj/index.html'
catalog =['货币名称', '现汇买入价', '现钞买入价', '现汇卖出价', '现钞卖出价', '中行折算价', '发布日期', '发布时间']
result ='\t'.join(catalog)
with open("./今日国际货币汇率.txt",'w+') as fp:
fp.write(result)
# 进行数据分析
def mysj(URL):
res = requests.get(url=URL,headers=headers, timeout=3)
res.encoding ='UTF-8'
res = etree.HTML(res.text)
return res
def handle(res):
catalog = res.xpath('//div/div/table/tr/td/text()')
return catalog
res = mysj(URL)
data = handle(res)
print(data)
num = []
for x in range(0,len(data)):
if u'\u4e00' <= data[x] <= u'\u9fff':
num.append(x)
print(num)
num.append(len(data))
s=[]
x1=0
count=-1
for x in num :
if x == 0 :
continue
count +=1
b = data[num[count]:x]
s.append(b)
print(s)
data=[]
for x in range(0,len(s)):
data.append('\t'.join(s[x]))
data = ('\n'.join(data))
print(data)
with open("./今日国际货币汇率.txt",'a') as fp:
fp.write(data)
# catalog = res.xpath('//div/div/table/tr/td/text()')
catalog = res.xpath('//div/div/table/tr/td')
for i in range catalog:
if i.xpath('/text()'):
...
else:
...