第四天了,爬个音乐,嘿嘿,不多说,上源码,然后我要去学scrapy框架了,等我学好了,回来继续给大家分享。
[Python] 纯文本查看 复制代码 import requests
import re
from lxml import etree
import os
class Spyder():
def headers(self):
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0'
}
self.first_request(headers)
def first_request(self,headers):
url = 'http://music.taihe.com/artist'
response = requests.get(url,headers=headers)
html = etree.HTML(response.content.decode())
art_name_list = html.xpath('//dt[@class="cover-img"]/a/img/@title')
art_link_list = html.xpath('//dt[@class="cover-img"]/a/@href')
for art_name,art_link in zip(art_name_list,art_link_list):
if os.path.exists(art_name) == False: #如果当前没有Bigtit,就创建一个
os.mkdir(art_name)
self.second_request(art_name,art_link,headers)
def second_request(self,art_name,art_link,headers):
response = requests.get('http://music.taihe.com'+art_link,headers=headers)
html1 = response.content.decode()
html = etree.HTML(response.content.decode())
song_name_list = html.xpath('//span[@class="songname"]/a/@title')
song_link_list =re.compile('<a href="/song/(.*?)" class="songlist-songname namelink overdd " ').findall(html1)
for song_name,song_link in zip(song_name_list,song_link_list):
self.load_music(song_name,song_link,art_name,headers)
def load_music(self,song_name,song_link,art_name,headers):
url= 'http://musicapi.taihe.com/v1/restserver/ting?method=baidu.ting.song.playAAC&format=jsonp&callback=jQuery17209000847668843108_1563975246089&songid='+song_link
response = requests.get(url,headers=headers).content.decode()
link =''.join(re.compile('{"show_link":"(.*?)",').findall(response)).replace('\\','')
self.data_request(link,song_name,art_name,headers)
# print("正在下载的歌曲是:".song_name)
def data_request(self,link,song_name,art_name,headers):
file_name =art_name +'\\' +song_name +'.mp3'
print('正在下载的歌曲是:',song_name,'作者:',art_name)
response = requests.get(link,headers=headers).content
with open(file_name,'wb') as f:
f.write(response)
spyder=Spyder()
spyder.headers()
|