本帖最后由 PAOLO 于 2019-3-18 21:49 编辑
爬酷狗音乐TOP500歌曲信息!
[Python] 纯文本查看 复制代码 #导入需要的模块
import requests
from bs4 import BeautifulSoup
import time
#伪装浏览器
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3641.400 QQBrowser/10.4.3284.400'
}#加入请求头
#获取网页信息
def get_info(url):
wb_data = requests.get(url,headers = headers)#定义获取信息的函数
soup = BeautifulSoup(wb_data.text,'lxml')
ranks = soup.select('span.pc_temp_num')
titles = soup.select('div.pc_temp_songlist > ul > li > a')
times = soup.select('span.pc_temp_tips_r > span')
for rank,title,time in zip(ranks,titles,times):
data ={
'排名':rank.get_text().strip(),
'歌手':title.get_text().split('-')[0],
'歌曲':title.get_text().split('-')[1],#通过split获取歌手和歌曲信息
'时间':time.get_text().strip(),
}
print(data)#获取爬虫信息并按字典格式打印
#
if __name__ == '__main__':#程序主入口
urls = ['http://www.kugou.com/yy/rank/home/{}-8888.html'.format(str(i)) for i in range(1,24)]#构造多页URL
for url in urls:
get_info(url)#循环调用get_info()函数
time.sleep(1)#暂停1秒
|