[Python] 纯文本查看 复制代码 #!/usr/bin/python
# -*- coding: utf-8 -*-
import requests
import re
import time
songKey = [] #存放歌曲key(url相关)
songNames = [] #存放歌曲名称
Author = []
url = "https://www.hifini.com/" #网站首页的URL
#url = "<a href=\"thread-62290.htm\"……>金池《谁不是》[FLAC/MP3-320K]</a><a href=\"thread-62290.htm\">金池《谁不是》[FLAC/MP3-320K]</a>"
html = requests.get(url)
strr = html.text
pat1 = 'thread-......htm'
song_url = re.findall(pat1,strr)
song_url = set(song_url)
#part2 = re.findall('《*》',html)
#part0 = re.findall(r"^(<a href=\")(\d+)(\">)",url,re.M) #用于解析歌曲所在的html字符串的正则
#print song_url
for i in song_url:
song_html = requests.get("https://www.hifini.com/"+i)
strr2 = song_html.text
realsong_url = re.findall(' url: \'(.*?)\',',strr2,re.S)
songNames = re.findall(' title: \'(.*?)\',',strr2,re.S)
PicUrl = re.findall(' pic: \'(.*?)\'',strr2,re.S)
Author = re.findall(' author:\'(.*?)\',',strr2,re.S)
Realsong_url = "".join(realsong_url)
SongNames = "".join(songNames)
Picurl = "".join(PicUrl)
AuThor = "".join(Author)
print(Realsong_url)
print(SongNames)
print(Picurl)
print(AuThor)
print()
# source_data = "www.hifini.com/"+ Realsong_url
# print("song_url = " + source_data)
# print("song_name = " + SongNames)
# print("pic_url = " + Picurl)
# print("author = " + AuThor)
# print()
|