[Asm] 纯文本查看 复制代码 import requests,re,time,os
import urllib.parse
import urllib.request
from urllib.parse import quote
from lxml import etree
'''
目标网站 :电影淘淘 url = https://www.dytt.com/
搜索url = https://www.dytt.com/vodsearch/-------------.html?wd={name}
'''
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36',
'Referer': 'https://www.dytt.com/',
}
#1. 建立搜索
def sou_suo(url):
i = 0
while True:
try:
rep = requests.get(url=url,headers=headers,timeout = 30)
if rep.status_code == 200 :
tree = etree.HTML(rep.text)
next_url ='https://www.dytt.com' + tree.xpath('//div[@class="tableListMain"]//tr/td[1]/a/@href')[0] #注意遇到tbody标签会让xpath失效
print(next_url)
return next_url
except:
i += 1
time.sleep(2)
if i>50:
print("无法获得响应,请检查输入的电影或电视剧名称是否正确")
break
print(f"正在尝试{i}连接")
continue
def moive_urls(url): #连续剧就显示选择列表 电影直接获取播放页
i = 0
while True:
try:
rep = requests.get(url=url, headers=headers, timeout=30)
if rep.status_code == 200:
tree = etree.HTML(rep.text)
li_lis= tree.xpath('//div[@class="bofangList"]//li') #注意遇到tbody标签会让xpath失效
dex = len(li_lis)
print(dex)
dics = []
for li in li_lis:
moive_url = 'https://www.dytt.com'+ li.xpath('./a/@href')[0]
moive_title = li.xpath('./a/text()')[0]
dic = {
'moive_title': moive_title,
'moive_url':moive_url,
}
dics.append(dic)
print(dic)
return dics
except:
i += 1
time.sleep(2)
if i > 20:
print("无法获得响应,请检查输入的电影或电视剧名称是否正确")
break
print(f"正在尝试{i}连接")
continue
def xuan_zhe(dics,name):
print(dics)
index = input("选择你要下载的集数:")
video_url = dics[index]['moive_url']
video_name =dics[index]['moive_title']
print(name+video_name,"正在进行请求响应")
dic2 = {
'video_url':video_url,
'video_name':video_name
}
return dic2
#-----------------------------------------------------------------------------------------------------------------------
# url = 'https://www.dytt.com/vod-play-id-155223-src-3-num-1.html'
"""
获取m3u8(有些视频有一个m3u8,有些有2个,有些加密,有些没有加密)
url的坑主要斜杠最好左斜杠/ 右斜杠\会被当成转义 不能获取响应
"""
def get_response(url):
i = 0
while True: #防止请求 不到 反复请求
try:
rep = requests.get(url=url,headers=headers,timeout = 30)
rep.encoding = rep.apparent_encoding
if rep.status_code == 200 :
return rep
# req = urllib.request.Request(url,headers=headers)
# rep = urllib.request.urlopen(req,timeout=2)
# print(rep.read()) # 获取响应体 二进制字符串
# print(rep.read().decode("utf-8")) # 对响应体进行解码
# return rep
except:
i += 1
time.sleep(2)
if i>50:
print("无法获得响应,请重新试试")
break
print(f"正在尝试{i}连接")
continue
def get_m3u8(response,path): #获取下载页m3u8
start_m3u8_url = re.findall(r'"url":"(?P<start_m3u8_url>.*?)","url_next"',response.text)[0].replace("\\","")
print(start_m3u8_url)
rep_start_m3u8 = get_response(start_m3u8_url)
path1 = path + "/start_m3u8.txt"
with open(path1,'w') as f:
f.write(rep_start_m3u8.text)
with open(path1,"r") as f1:
urls = []
for line in f1:
line = line.strip()
if line.startswith("#"):
continue
urls.append(line)
if len(urls) <5:
second_m3u8_url = urllib.parse.urljoin(start_m3u8_url,urls[0])
print(second_m3u8_url)
rep_second_m3u8 = get_response(second_m3u8_url)
second_urls=[]
path2 = path + "/second_m3u8.txt"
with open(path2, 'w') as f:
f.write(rep_second_m3u8.text)
with open(path2, "r") as f1:
urls = []
for line2 in f1:
line2 = line2.strip()
if line2.startswith("#"):
continue
urls.append(line2)
print(line2)
return second_urls
elif len(urls)>5:
return urls
def movie_down(url,path):
ts_name = url.split("/")[-1]
rep3 = get_response(url)
print(ts_name, "正在下载")
path3 = path +"/"+ ts_name
with open(path3,"wb") as f3:
f3.write(rep3.content)
print(ts_name,"下载完成")
def main():
name =quote(str(input("请输入需要下载的电影或电视剧名称:")))
start_url = f'https://www.dytt.com/vodsearch/-------------.html?wd={name}'
print(start_url)
next_url = sou_suo(start_url)
dics = moive_urls(next_url)
dic2 = xuan_zhe(dics,name)
if not os.path.exists(name):
os.mkdir(name)
response = get_response(dic2['video_url'])
path = name +"/" +dic2['video_name']
urls = get_m3u8(response,path)
for ts_url in urls:
movie_down(ts_url, path)
if __name__ == '__main__':
main() |