本帖最后由 cnbrave 于 2020-1-9 11:25 编辑
朋友叫我爬取最近一期的双色球,刚好现学现用,就试着做出来了,请大家指点。
[Python] 纯文本查看 复制代码 import requests
from lxml import etree
'''
type=1 双色球,type=2 超级大乐透。
url_num为空时,最新一期信息。
url_num也可以输入具体第几期。
'''
def main(type=1, url_num=''):
url = ''
ua_headers = {"User-Agent": 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)'}
type_title = ''
if type == 1:
type_title = '双色球 第 '
if url_num == '':
url = 'https://kaijiang.500.com/ssq.shtml'
else:
url = 'https://kaijiang.500.com/shtml/ssq/' + url_num + '.shtml'
else:
type_title = '超级大乐透 第 '
if url_num == '':
url = 'http://kaijiang.500.com/dlt.shtml'
else:
url = 'https://kaijiang.500.com/shtml/dlt/' + url_num + '.shtml'
try:
response = requests.get(url=url, headers=ua_headers).text
tree = etree.HTML(response)
td_title01 = "\r\n" + type_title + str((tree.xpath("//font[@class='cfont2']/strong/text()")[0])) + " 期:"
num_lst = tree.xpath("//div[@class='ball_box01']/ul/li/text()")
print(td_title01)
print("开奖号码:", end='')
for num in num_lst:
print(num, end=' ')
print("\r\n出球顺序:", end='')
sort_lst = tree.xpath('normalize-space(//table[@class="kj_tablelist02"][1]/tr[2]/td[1]/table[1]/tr[2]/td[2]/text())')
print(sort_lst)
print('\r\n**************************************************************')
except(Exception):
print("程序错误,请检查!\r\n" + Exception)
if __name__ == '__main__':
main(1) # 双色球
main(2) # 超级大乐透
|